1 /* Optimized memset implementation for PowerPC.
2 Copyright (C) 1997 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
23 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
26 The memset is done in three sizes: byte (8 bits), word (32 bits),
27 cache line (256 bits). There is a special case for setting cache lines
28 to 0, to take advantage of the dcbz instruction.
29 r6: current address we are storing at
30 r7: number of bytes we are setting now (when aligning) */
32 /* take care of case for size <= 4 */
37 /* align to word boundary */
39 rlwimi %r4,%r4,8,16,23
40 beq+ L(aligned) # 8th instruction from .align
48 0: sth %r4,-2(%r6) # 16th instruction from .align
49 /* take care of case for size < 31 */
52 rlwimi %r4,%r4,16,0,15
54 /* align to cache line boundary... */
67 stw %r4,-4(%r8) # 32nd instruction from .align
73 /* now aligned to a cache line. */
77 mtcrf 0x01,%r5 # 40th instruction from .align
78 beq %cr1,L(zloopstart) # special case for clearing memory using dcbz
81 beq L(medium) # we may not actually get to do a full line
85 bdz L(cloopdone) # 48th instruction from .align
92 nop # let 601 fetch last 4 instructions of loop
94 stw %r4,-24(%r6) # 56th instruction from .align
95 nop # let 601 fetch first 8 instructions of loop
103 stw %r4,-16(%r6) # 64th instruction from .align
111 b L(medium_tail2) # 72nd instruction from .align
115 /* Clear lines of memory in 128-byte chunks. */
123 cmplwi %cr1,%r5,16 # 8
131 addi %r6,%r6,0x40 # 16
146 /* Memset of 4 bytes or less. */
161 /* Memset of 0-31 bytes. */
173 bge- %cr1,L(medium_27t)
175 stw %r4,-4(%r6) # 8th instruction from .align
187 blt- %cr1,L(medium_27f) # 16th instruction from .align