2 * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
3 * xthal_memcpy and xthal_bcopy
5 * This file is subject to the terms and conditions of the GNU General Public
6 * License. See the file "COPYING" in the main directory of this archive
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
12 #include <variant/core.h>
14 .macro src_b r, w0, w1
31 * void *memcpy(void *dst, const void *src, size_t len);
33 * This function is intended to do the same thing as the standard
34 * library function memcpy() for most cases.
35 * However, where the source and/or destination references
36 * an instruction RAM or ROM or a data RAM or ROM, that
37 * source and/or destination will always be accessed with
38 * 32-bit load and store instructions (as required for these
42 * !!!!!!! Handling of IRAM/IROM has not yet
43 * !!!!!!! been implemented.
45 * The (general case) algorithm is as follows:
46 * If destination is unaligned, align it by conditionally
47 * copying 1 and 2 bytes.
48 * If source is aligned,
49 * do 16 bytes with a loop, and then finish up with
50 * 8, 4, 2, and 1 byte copies conditional on the length;
51 * else (if source is unaligned),
52 * do the same, but use SRC to align the source data.
53 * This code tries to use fall-through branches for the common
54 * case of aligned source and destination and multiple
78 .byte 0 # 1 mod 4 alignment for LOOPNEZ
79 # (0 mod 4 alignment for LBEG)
82 loopnez a4, .Lbytecopydone
83 #else /* !XCHAL_HAVE_LOOPS */
84 beqz a4, .Lbytecopydone
85 add a7, a3, a4 # a7 = end address for source
86 #endif /* !XCHAL_HAVE_LOOPS */
93 bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
94 #endif /* !XCHAL_HAVE_LOOPS */
99 * Destination is unaligned
103 .Ldst1mod2: # dst is only byte aligned
104 _bltui a4, 7, .Lbytecopy # do short copies byte by byte
112 _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
113 # return to main algorithm
114 .Ldst2mod4: # dst 16-bit aligned
116 _bltui a4, 6, .Lbytecopy # do short copies byte by byte
124 j .Ldstaligned # dst is now aligned, return to main algorithm
128 .type memcpy,@function
131 entry sp, 16 # minimal stack frame
132 # a2/ dst, a3/ src, a4/ len
133 mov a5, a2 # copy dst so that a2 is return value
135 _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
136 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
137 .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
138 srli a7, a4, 4 # number of loop iterations with 16B
140 movi a8, 3 # if source is not aligned,
141 _bany a3, a8, .Lsrcunaligned # then use shifting copy
143 * Destination and source are word-aligned, use word copy.
145 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
147 loopnez a7, .Loop1done
148 #else /* !XCHAL_HAVE_LOOPS */
151 add a8, a8, a3 # a8 = end of last 16B source chunk
152 #endif /* !XCHAL_HAVE_LOOPS */
164 #if !XCHAL_HAVE_LOOPS
165 bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end
166 #endif /* !XCHAL_HAVE_LOOPS */
205 * Destination is aligned, Source is unaligned
210 _beqz a4, .Ldone # avoid loading anything for zero-length copies
211 # copy 16 bytes per iteration for word-aligned dst and unaligned src
212 ssa8 a3 # set shift amount from byte offset
214 /* set to 1 when running on ISS (simulator) with the
215 lint or ferret client, or 0 to save a few cycles */
216 #define SIM_CHECKS_ALIGNMENT 1
217 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
218 and a11, a3, a8 # save unalignment offset for below
219 sub a3, a3, a11 # align a3
221 l32i a6, a3, 0 # load first word
223 loopnez a7, .Loop2done
224 #else /* !XCHAL_HAVE_LOOPS */
227 add a10, a10, a3 # a10 = end of last 16B source chunk
228 #endif /* !XCHAL_HAVE_LOOPS */
244 #if !XCHAL_HAVE_LOOPS
245 bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
246 #endif /* !XCHAL_HAVE_LOOPS */
269 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
270 add a3, a3, a11 # readjust a3 with correct misalignment
293 * void bcopy(const void *src, void *dest, size_t n);
297 .type bcopy,@function
299 entry sp, 16 # minimal stack frame
300 # a2=src, a3=dst, a4=len
304 j .Lmovecommon # go to common code for memmove+bcopy
307 * void *memmove(void *dst, const void *src, size_t len);
309 * This function is intended to do the same thing as the standard
310 * library function memmove() for most cases.
311 * However, where the source and/or destination references
312 * an instruction RAM or ROM or a data RAM or ROM, that
313 * source and/or destination will always be accessed with
314 * 32-bit load and store instructions (as required for these
318 * !!!!!!! Handling of IRAM/IROM has not yet
319 * !!!!!!! been implemented.
321 * The (general case) algorithm is as follows:
322 * If end of source doesn't overlap destination then use memcpy.
323 * Otherwise do memcpy backwards.
344 .byte 0 # 1 mod 4 alignment for LOOPNEZ
345 # (0 mod 4 alignment for LBEG)
348 loopnez a4, .Lbackbytecopydone
349 #else /* !XCHAL_HAVE_LOOPS */
350 beqz a4, .Lbackbytecopydone
351 sub a7, a3, a4 # a7 = start address for source
352 #endif /* !XCHAL_HAVE_LOOPS */
358 #if !XCHAL_HAVE_LOOPS
359 bne a3, a7, .Lbacknextbyte # continue loop if
360 # $a3:src != $a7:src_start
361 #endif /* !XCHAL_HAVE_LOOPS */
366 * Destination is unaligned
370 .Lbackdst1mod2: # dst is only byte aligned
371 _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
379 _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
380 # return to main algorithm
381 .Lbackdst2mod4: # dst 16-bit aligned
383 _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
391 j .Lbackdstaligned # dst is now aligned,
392 # return to main algorithm
396 .type memmove,@function
399 entry sp, 16 # minimal stack frame
400 # a2/ dst, a3/ src, a4/ len
401 mov a5, a2 # copy dst so that a2 is return value
404 bgeu a6, a4, .Lcommon
409 _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
410 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
411 .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
412 srli a7, a4, 4 # number of loop iterations with 16B
414 movi a8, 3 # if source is not aligned,
415 _bany a3, a8, .Lbacksrcunaligned # then use shifting copy
417 * Destination and source are word-aligned, use word copy.
419 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
421 loopnez a7, .backLoop1done
422 #else /* !XCHAL_HAVE_LOOPS */
423 beqz a7, .backLoop1done
425 sub a8, a3, a8 # a8 = start of first 16B source chunk
426 #endif /* !XCHAL_HAVE_LOOPS */
438 #if !XCHAL_HAVE_LOOPS
439 bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
440 #endif /* !XCHAL_HAVE_LOOPS */
442 bbci.l a4, 3, .Lback2
451 bbsi.l a4, 2, .Lback3
452 bbsi.l a4, 1, .Lback4
453 bbsi.l a4, 0, .Lback5
461 bbsi.l a4, 1, .Lback4
462 bbsi.l a4, 0, .Lback5
470 bbsi.l a4, 0, .Lback5
481 * Destination is aligned, Source is unaligned
486 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
487 # copy 16 bytes per iteration for word-aligned dst and unaligned src
488 ssa8 a3 # set shift amount from byte offset
489 #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
490 * the lint or ferret client, or 0
491 * to save a few cycles */
492 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
493 and a11, a3, a8 # save unalignment offset for below
494 sub a3, a3, a11 # align a3
496 l32i a6, a3, 0 # load first word
498 loopnez a7, .backLoop2done
499 #else /* !XCHAL_HAVE_LOOPS */
500 beqz a7, .backLoop2done
502 sub a10, a3, a10 # a10 = start of first 16B source chunk
503 #endif /* !XCHAL_HAVE_LOOPS */
519 #if !XCHAL_HAVE_LOOPS
520 bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
521 #endif /* !XCHAL_HAVE_LOOPS */
523 bbci.l a4, 3, .Lback12
535 bbci.l a4, 2, .Lback13
544 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
545 add a3, a3, a11 # readjust a3 with correct misalignment
547 bbsi.l a4, 1, .Lback14
548 bbsi.l a4, 0, .Lback15
559 bbsi.l a4, 0, .Lback15
573 * comment-start: "# "
574 * comment-start-skip: "# *"