1 /* SPDX-License-Identifier: MIT */
3 * memset - fill memory with a constant byte
5 * Copyright (c) 2012-2021, Arm Limited.
10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
14 #include <asm/macro.h>
30 * The optimized memset uses the dc opcode, which causes problems
31 * when the cache is disabled. Let's check if the cache is disabled
32 * and use a very simple memset implementation in this case. Otherwise
33 * jump to the optimized version.
35 switch_el x6, 3f, 2f, 1f
46 * A very "simple" memset implementation without the use of the
47 * dc opcode. Can be run with caches disabled.
50 cmp count, x3 /* check for zero length */
52 4: strb valw, [dstin, x3]
59 /* Here the optimized memset version starts */
61 add dstend, dstin, count
69 /* Set 0..15 bytes. */
77 str valw, [dstend, -4]
82 strh valw, [dstend, -2]
85 /* Set 17..96 bytes. */
88 tbnz count, 6, L(set96)
96 /* Set 64..96 bytes. Write 64 bytes from the start and
97 32 bytes from the end. */
100 stp q0, q0, [dstin, 32]
101 stp q0, q0, [dstend, -32]
113 #ifndef SKIP_ZVA_CHECK
114 mrs zva_val, dczid_el0
115 and zva_val, zva_val, 31
116 cmp zva_val, 4 /* ZVA size is 64 bytes. */
120 stp q0, q0, [dst, 32]
122 sub count, dstend, dst /* Count is now 64 too large. */
123 sub count, count, 128 /* Adjust count and bias for loop. */
129 subs count, count, 64
131 stp q0, q0, [dstend, -64]
132 stp q0, q0, [dstend, -32]
136 sub count, dstend, dst /* Count is 16 too large. */
137 sub dst, dst, 16 /* Dst is biased by -32. */
138 sub count, count, 64 + 16 /* Adjust count and bias for loop. */
140 stp q0, q0, [dst, 32]
141 stp q0, q0, [dst, 64]!
142 subs count, count, 64
144 stp q0, q0, [dstend, -64]
145 stp q0, q0, [dstend, -32]