1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (C) 2013 Regents of the University of California
6 #include <linux/linkage.h>
9 /* void *memcpy(void *, const void *, size_t) */
12 /* Save for return value */
16 * Register allocation for code below:
17 * a0 - start of uncopied dst
18 * a1 - start of uncopied src
19 * t0 - end of uncopied dst
24 * Use bytewise copy if too small.
26 * This threshold must be at least 2*SZREG to ensure at least one
27 * wordwise copy is performed. It is chosen to be 16 because it will
28 * save at least 7 iterations of bytewise copy, which pays off the
32 bltu a2, a3, .Lbyte_copy_tail
35 * Bytewise copy first to align a0 to word boundary.
38 andi a2, a2, ~(SZREG-1)
49 * Now a0 is word-aligned. If a1 is also word aligned, we could perform
50 * aligned word-wise copy. Otherwise we need to perform misaligned
54 bnez a3, .Lmisaligned_word_copy
56 /* Unrolled wordwise copy */
57 addi t0, t0, -(16*SZREG-1)
70 REG_L t5, 10*SZREG(a1)
81 REG_S t5, 10*SZREG(a0)
82 REG_L a2, 11*SZREG(a1)
83 REG_L a3, 12*SZREG(a1)
84 REG_L a4, 13*SZREG(a1)
85 REG_L a5, 14*SZREG(a1)
86 REG_L a6, 15*SZREG(a1)
88 REG_S a2, 11*SZREG(a0)
89 REG_S a3, 12*SZREG(a0)
90 REG_S a4, 13*SZREG(a0)
91 REG_S a5, 14*SZREG(a0)
92 REG_S a6, 15*SZREG(a0)
96 /* Post-loop increment by 16*SZREG-1 and pre-loop decrement by SZREG-1 */
112 * Bytewise copy anything left.
126 .Lmisaligned_word_copy:
128 * Misaligned word-wise copy.
129 * For misaligned copy we still perform word-wise copy, but we need to
130 * use the value fetched from the previous iteration and do some shifts.
131 * This is safe because we wouldn't access more words than necessary.
134 /* Calculate shifts */
136 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */
138 /* Load the initial value and align a1 */
139 andi a1, a1, ~(SZREG-1)
142 addi t0, t0, -(SZREG-1)
143 /* At least one iteration will be executed here, no check */
154 /* Update pointers to correct value */