...
Run Format

Text file src/runtime/memmove_arm.s

Documentation: runtime

     1	// Inferno's libkern/memmove-arm.s
     2	// https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s
     3	//
     4	//         Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
     5	//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
     6	//         Portions Copyright 2009 The Go Authors. All rights reserved.
     7	//
     8	// Permission is hereby granted, free of charge, to any person obtaining a copy
     9	// of this software and associated documentation files (the "Software"), to deal
    10	// in the Software without restriction, including without limitation the rights
    11	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    12	// copies of the Software, and to permit persons to whom the Software is
    13	// furnished to do so, subject to the following conditions:
    14	//
    15	// The above copyright notice and this permission notice shall be included in
    16	// all copies or substantial portions of the Software.
    17	//
    18	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    19	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    20	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    21	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    22	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    23	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    24	// THE SOFTWARE.
    25	
    26	#include "textflag.h"
    27	
    28	// TE or TS are spilled to the stack during bulk register moves.
    29	#define TS	R0
    30	#define TE	R8
    31	
    32	// Warning: the linker will use R11 to synthesize certain instructions. Please
    33	// take care and double check with objdump.
    34	#define FROM	R11
    35	#define N	R12
    36	#define TMP	R12				/* N and TMP don't overlap */
    37	#define TMP1	R5
    38	
    39	#define RSHIFT	R5
    40	#define LSHIFT	R6
    41	#define OFFSET	R7
    42	
    43	#define BR0	R0					/* shared with TS */
    44	#define BW0	R1
    45	#define BR1	R1
    46	#define BW1	R2
    47	#define BR2	R2
    48	#define BW2	R3
    49	#define BR3	R3
    50	#define BW3	R4
    51	
    52	#define FW0	R1
    53	#define FR0	R2
    54	#define FW1	R2
    55	#define FR1	R3
    56	#define FW2	R3
    57	#define FR2	R4
    58	#define FW3	R4
    59	#define FR3	R8					/* shared with TE */
    60	
    61	TEXT runtime·memmove(SB), NOSPLIT, $4-12
    62	_memmove:
    63		MOVW	to+0(FP), TS
    64		MOVW	from+4(FP), FROM
    65		MOVW	n+8(FP), N
    66	
    67		ADD	N, TS, TE	/* to end pointer */
    68	
    69		CMP	FROM, TS
    70		BLS	_forward
    71	
    72	_back:
    73		ADD	N, FROM		/* from end pointer */
    74		CMP	$4, N		/* need at least 4 bytes to copy */
    75		BLT	_b1tail
    76	
    77	_b4align:				/* align destination on 4 */
    78		AND.S	$3, TE, TMP
    79		BEQ	_b4aligned
    80	
    81		MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    82		MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    83		B	_b4align
    84	
    85	_b4aligned:				/* is source now aligned? */
    86		AND.S	$3, FROM, TMP
    87		BNE	_bunaligned
    88	
    89		ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
    90		MOVW	TS, savedts-4(SP)
    91	_b32loop:
    92		CMP	TMP, TE
    93		BLS	_b4tail
    94	
    95		MOVM.DB.W (FROM), [R0-R7]
    96		MOVM.DB.W [R0-R7], (TE)
    97		B	_b32loop
    98	
    99	_b4tail:				/* do remaining words if possible */
   100		MOVW	savedts-4(SP), TS
   101		ADD	$3, TS, TMP
   102	_b4loop:
   103		CMP	TMP, TE
   104		BLS	_b1tail
   105	
   106		MOVW.W	-4(FROM), TMP1	/* pre-indexed */
   107		MOVW.W	TMP1, -4(TE)	/* pre-indexed */
   108		B	_b4loop
   109	
   110	_b1tail:				/* remaining bytes */
   111		CMP	TE, TS
   112		BEQ	_return
   113	
   114		MOVBU.W	-1(FROM), TMP	/* pre-indexed */
   115		MOVBU.W	TMP, -1(TE)	/* pre-indexed */
   116		B	_b1tail
   117	
   118	_forward:
   119		CMP	$4, N		/* need at least 4 bytes to copy */
   120		BLT	_f1tail
   121	
   122	_f4align:				/* align destination on 4 */
   123		AND.S	$3, TS, TMP
   124		BEQ	_f4aligned
   125	
   126		MOVBU.P	1(FROM), TMP	/* implicit write back */
   127		MOVBU.P	TMP, 1(TS)	/* implicit write back */
   128		B	_f4align
   129	
   130	_f4aligned:				/* is source now aligned? */
   131		AND.S	$3, FROM, TMP
   132		BNE	_funaligned
   133	
   134		SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
   135		MOVW	TE, savedte-4(SP)
   136	_f32loop:
   137		CMP	TMP, TS
   138		BHS	_f4tail
   139	
   140		MOVM.IA.W (FROM), [R1-R8] 
   141		MOVM.IA.W [R1-R8], (TS)
   142		B	_f32loop
   143	
   144	_f4tail:
   145		MOVW	savedte-4(SP), TE
   146		SUB	$3, TE, TMP	/* do remaining words if possible */
   147	_f4loop:
   148		CMP	TMP, TS
   149		BHS	_f1tail
   150	
   151		MOVW.P	4(FROM), TMP1	/* implicit write back */
   152		MOVW.P	TMP1, 4(TS)	/* implicit write back */
   153		B	_f4loop
   154	
   155	_f1tail:
   156		CMP	TS, TE
   157		BEQ	_return
   158	
   159		MOVBU.P	1(FROM), TMP	/* implicit write back */
   160		MOVBU.P	TMP, 1(TS)	/* implicit write back */
   161		B	_f1tail
   162	
   163	_return:
   164		MOVW	to+0(FP), R0
   165		RET
   166	
   167	_bunaligned:
   168		CMP	$2, TMP		/* is TMP < 2 ? */
   169	
   170		MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
   171		MOVW.LT	$24, LSHIFT
   172		MOVW.LT	$1, OFFSET
   173	
   174		MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
   175		MOVW.EQ	$16, LSHIFT
   176		MOVW.EQ	$2, OFFSET
   177	
   178		MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
   179		MOVW.GT	$8, LSHIFT
   180		MOVW.GT	$3, OFFSET
   181	
   182		ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
   183		CMP	TMP, TE
   184		BLS	_b1tail
   185	
   186		BIC	$3, FROM		/* align source */
   187		MOVW	TS, savedts-4(SP)
   188		MOVW	(FROM), BR0	/* prime first block register */
   189	
   190	_bu16loop:
   191		CMP	TMP, TE
   192		BLS	_bu1tail
   193	
   194		MOVW	BR0<<LSHIFT, BW3
   195		MOVM.DB.W (FROM), [BR0-BR3]
   196		ORR	BR3>>RSHIFT, BW3
   197	
   198		MOVW	BR3<<LSHIFT, BW2
   199		ORR	BR2>>RSHIFT, BW2
   200	
   201		MOVW	BR2<<LSHIFT, BW1
   202		ORR	BR1>>RSHIFT, BW1
   203	
   204		MOVW	BR1<<LSHIFT, BW0
   205		ORR	BR0>>RSHIFT, BW0
   206	
   207		MOVM.DB.W [BW0-BW3], (TE)
   208		B	_bu16loop
   209	
   210	_bu1tail:
   211		MOVW	savedts-4(SP), TS
   212		ADD	OFFSET, FROM
   213		B	_b1tail
   214	
   215	_funaligned:
   216		CMP	$2, TMP
   217	
   218		MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
   219		MOVW.LT	$24, LSHIFT
   220		MOVW.LT	$3, OFFSET
   221	
   222		MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
   223		MOVW.EQ	$16, LSHIFT
   224		MOVW.EQ	$2, OFFSET
   225	
   226		MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
   227		MOVW.GT	$8, LSHIFT
   228		MOVW.GT	$1, OFFSET
   229	
   230		SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
   231		CMP	TMP, TS
   232		BHS	_f1tail
   233	
   234		BIC	$3, FROM		/* align source */
   235		MOVW	TE, savedte-4(SP)
   236		MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
   237	
   238	_fu16loop:
   239		CMP	TMP, TS
   240		BHS	_fu1tail
   241	
   242		MOVW	FR3>>RSHIFT, FW0
   243		MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
   244		ORR	FR0<<LSHIFT, FW0
   245	
   246		MOVW	FR0>>RSHIFT, FW1
   247		ORR	FR1<<LSHIFT, FW1
   248	
   249		MOVW	FR1>>RSHIFT, FW2
   250		ORR	FR2<<LSHIFT, FW2
   251	
   252		MOVW	FR2>>RSHIFT, FW3
   253		ORR	FR3<<LSHIFT, FW3
   254	
   255		MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
   256		B	_fu16loop
   257	
   258	_fu1tail:
   259		MOVW	savedte-4(SP), TE
   260		SUB	OFFSET, FROM
   261		B	_f1tail

View as plain text