1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library. If not, see
16 <http://www.gnu.org/licenses/>. */
18 #ifdef ANDROID_CHANGES
19 # include "machine/asm.h"
20 # include "machine/regdef.h"
21 # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
26 # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
27 #elif _COMPILING_NEWLIB
28 # include "machine/asm.h"
29 # include "machine/regdef.h"
30 # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
36 /* Check to see if the MIPS architecture we are compiling for supports
39 #if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
40 # ifndef DISABLE_PREFETCH
45 #if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
46 # ifndef DISABLE_DOUBLE
52 # ifndef DISABLE_DOUBLE_ALIGN
57 #if __mips_isa_rev > 5
58 # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
59 # undef PREFETCH_STORE_HINT
60 # define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
65 /* Some asm.h files do not have the L macro definition. */
67 # if _MIPS_SIM == _ABIO32
68 # define L(label) $L ## label
70 # define L(label) .L ## label
74 /* Some asm.h files do not have the PTR_ADDIU macro definition. */
77 # define PTR_ADDIU daddiu
79 # define PTR_ADDIU addiu
83 /* New R6 instructions that may not be in asm.h. */
85 # if _MIPS_SIM == _ABI64
92 /* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
93 or PREFETCH_STORE_STREAMED offers a large performance advantage
94 but PREPAREFORSTORE has some special restrictions to consider.
96 Prefetch with the 'prepare for store' hint does not copy a memory
97 location into the cache, it just allocates a cache line and zeros
98 it out. This means that if you do not write to the entire cache
99 line before writing it out to memory some data will get zero'ed out
100 when the cache line is written back to memory and data will be lost.
102 There are ifdef'ed sections of this memcpy to make sure that it does not
103 do prefetches on cache lines that are not going to be completely written.
104 This code is only needed and only used when PREFETCH_STORE_HINT is set to
105 PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
106 less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will
107 not work correctly. */
110 # define PREFETCH_HINT_STORE 1
111 # define PREFETCH_HINT_STORE_STREAMED 5
112 # define PREFETCH_HINT_STORE_RETAINED 7
113 # define PREFETCH_HINT_PREPAREFORSTORE 30
115 /* If we have not picked out what hints to use at this point use the
116 standard load and store prefetch hints. */
117 # ifndef PREFETCH_STORE_HINT
118 # define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
121 /* We double everything when USE_DOUBLE is true so we do 2 prefetches to
122 get 64 bytes in that case. The assumption is that each individual
123 prefetch brings in 32 bytes. */
125 # define PREFETCH_CHUNK 64
126 # define PREFETCH_FOR_STORE(chunk, reg) \
127 pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
128 pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
130 # define PREFETCH_CHUNK 32
131 # define PREFETCH_FOR_STORE(chunk, reg) \
132 pref PREFETCH_STORE_HINT, (chunk)*32(reg)
135 /* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
136 than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
137 of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
138 hint is used, the code will not work correctly. If PREPAREFORSTORE is not
139 used than MAX_PREFETCH_SIZE does not matter. */
140 # define MAX_PREFETCH_SIZE 128
141 /* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
142 than 5 on a STORE prefetch and that a single prefetch can never be larger
143 than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
144 we actually do two prefetches in that case, one 32 bytes after the other. */
146 # define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
148 # define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
151 # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
152 && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
153 /* We cannot handle this because the initial prefetches may fetch bytes that
154 are before the buffer being copied. We start copies with an offset
155 of 4 so avoid this situation when using PREPAREFORSTORE. */
156 # error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
158 #else /* USE_PREFETCH not defined */
159 # define PREFETCH_FOR_STORE(offset, reg)
162 /* Allow the routine to be named something else if desired. */
164 # define MEMSET_NAME memset
167 /* We load/store 64 bits at a time when USE_DOUBLE is true.
168 The C_ prefix stands for CHUNK and is used to avoid macro name
169 conflicts with system header files. */
174 # define C_STHI sdl /* high part is left in big-endian */
176 # define C_STHI sdr /* high part is right in little-endian */
181 # define C_STHI swl /* high part is left in big-endian */
183 # define C_STHI swr /* high part is right in little-endian */
187 /* Bookkeeping values for 32 vs. 64 bit mode. */
190 # define NSIZEMASK 0x3f
191 # define NSIZEDMASK 0x7f
194 # define NSIZEMASK 0x1f
195 # define NSIZEDMASK 0x3f
197 #define UNIT(unit) ((unit)*NSIZE)
198 #define UNITM1(unit) (((unit)*NSIZE)-1)
200 #ifdef ANDROID_CHANGES
208 /* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
209 size, copy dst pointer to v0 for the return value. */
210 slti t2,a2,(2 * NSIZE)
214 /* If memset value is not zero, we copy it to all the bytes in a 32 or 64
216 beq a1,zero,L(set0) /* If memset value is zero no smear */
220 /* smear byte into 32 or 64 bit word */
221 #if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2)
223 dins a1, a1, 8, 8 /* Replicate fill byte into half-word. */
224 dins a1, a1, 16, 16 /* Replicate fill byte into word. */
225 dins a1, a1, 32, 32 /* Replicate fill byte into dbl word. */
227 ins a1, a1, 8, 8 /* Replicate fill byte into half-word. */
228 ins a1, a1, 16, 16 /* Replicate fill byte into word. */
248 /* If the destination address is not aligned do a partial store to get it
249 aligned. If it is already aligned just jump to L(aligned). */
252 andi t2,a3,(NSIZE-1) /* word-unaligned address? */
253 beq t2,zero,L(aligned) /* t2 is the unalignment count */
295 /* If USE_DOUBLE is not set we may still want to align the data on a 16
296 byte boundry instead of an 8 byte boundry to maximize the opportunity
297 of proAptiv chips to do memory bonding (combining two sequential 4
298 byte stores into one 8 byte store). We know there are at least 4 bytes
299 left to store or we would have jumped to L(lastb) earlier in the code. */
302 beq t2,zero,L(double_aligned)
309 /* Now the destination is aligned to (word or double word) aligned address
310 Set a2 to count how many bytes we have to copy after all the 64/128 byte
311 chunks are copied and a3 to the dest pointer after all the 64/128 byte
312 chunks have been copied. We will loop, incrementing a0 until it equals
314 andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
315 beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
316 PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
317 PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
319 /* When in the loop we may prefetch with the 'prepare to store' hint,
320 in this case the a0+x should not be past the "t0-32" address. This
321 means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
322 for x=64 the last "safe" a0 address is "t0-96" In the current version we
323 will use "prefetch hint,128(a0)", so "t0-160" is the limit. */
324 #if defined(USE_PREFETCH) \
325 && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
326 PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
327 PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
329 #if defined(USE_PREFETCH) \
330 && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
331 PREFETCH_FOR_STORE (1, a0)
332 PREFETCH_FOR_STORE (2, a0)
333 PREFETCH_FOR_STORE (3, a0)
337 #if defined(USE_PREFETCH) \
338 && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
339 sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
344 PREFETCH_FOR_STORE (2, a0)
346 PREFETCH_FOR_STORE (4, a0)
347 PREFETCH_FOR_STORE (5, a0)
366 PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
371 /* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go.
372 Check for a 32(64) byte chunk and copy if if there is one. Otherwise
373 jump down to L(chk1w) to handle the tail end of the copy. */
375 andi t8,a2,NSIZEMASK /* is there a 32-byte/64-byte chunk. */
376 /* the t8 is the reminder count past 32-bytes */
377 beq a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
387 PTR_ADDIU a0,a0,UNIT(8)
389 /* Here we have less than 32(64) bytes to set. Set up for a loop to
390 copy one word (or double word) at a time. Set a2 to count how many
391 bytes we have to copy after all the word (or double word) chunks are
392 copied and a3 to the dest pointer after all the (d)word chunks have
393 been copied. We will loop, incrementing a0 until a0 equals a3. */
395 andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
397 PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
398 PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
400 /* copying in words (4-byte or 8 byte chunks) */
402 PTR_ADDIU a0,a0,UNIT(1)
403 bne a0,a3,L(wordCopy_loop)
406 /* Copy the last 8 (or 16) bytes */
409 PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
412 bne a0,a3,L(lastbloop)
421 #ifndef ANDROID_CHANGES
423 libc_hidden_builtin_def (MEMSET_NAME)