1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (C) 2013 Regents of the University of California
6 #include <linux/linkage.h>
9 /* void *memcpy(void *, const void *, size_t) */
13 /* Save for return value */
17 * Register allocation for code below:
18 * a0 - start of uncopied dst
19 * a1 - start of uncopied src
20 * t0 - end of uncopied dst
25 * Use bytewise copy if too small.
27 * This threshold must be at least 2*SZREG to ensure at least one
28 * wordwise copy is performed. It is chosen to be 16 because it will
29 * save at least 7 iterations of bytewise copy, which pays off the
33 bltu a2, a3, .Lbyte_copy_tail
36 * Bytewise copy first to align a0 to word boundary.
39 andi a2, a2, ~(SZREG-1)
50 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
51 * aligned word-wise copy. Otherwise we need to perform misaligned
55 bnez a3, .Lmisaligned_word_copy
57 /* Unrolled wordwise copy */
58 addi t0, t0, -(16*SZREG-1)
71 REG_L t5, 10*SZREG(a1)
82 REG_S t5, 10*SZREG(a0)
83 REG_L a2, 11*SZREG(a1)
84 REG_L a3, 12*SZREG(a1)
85 REG_L a4, 13*SZREG(a1)
86 REG_L a5, 14*SZREG(a1)
87 REG_L a6, 15*SZREG(a1)
89 REG_S a2, 11*SZREG(a0)
90 REG_S a3, 12*SZREG(a0)
91 REG_S a4, 13*SZREG(a0)
92 REG_S a5, 14*SZREG(a0)
93 REG_S a6, 15*SZREG(a0)
97 /* Post-loop increment by 16*SZREG-1 and pre-loop decrement by SZREG-1 */
113 * Bytewise copy anything left.
128 .Lmisaligned_word_copy:
130 * Misaligned word-wise copy.
131 * For misaligned copy we still perform word-wise copy, but we need to
132 * use the value fetched from the previous iteration and do some shifts.
133 * This is safe because we wouldn't access more words than necessary.
136 /* Calculate shifts */
138 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */
140 /* Load the initial value and align a1 */
141 andi a1, a1, ~(SZREG-1)
144 addi t0, t0, -(SZREG-1)
145 /* At least one iteration will be executed here, no check */
156 /* Update pointers to correct value */