1 /* Pentium optimized __mpn_rshift --
2 Copyright (C) 1992-2013 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not,
17 see <http://www.gnu.org/licenses/>. */
20 #include "asm-syntax.h"
24 #define PARMS LINKAGE+16 /* space for 4 saved regs */
26 #define S RES+PTR_SIZE
27 #define SIZE S+PTR_SIZE
31 ENTRY (BP_SYM (__mpn_rshift))
34 cfi_adjust_cfa_offset (4)
36 cfi_adjust_cfa_offset (4)
38 cfi_adjust_cfa_offset (4)
39 cfi_rel_offset (ebp, 0)
41 cfi_adjust_cfa_offset (4)
44 cfi_rel_offset (edi, 12)
46 cfi_rel_offset (esi, 8)
48 cfi_rel_offset (ebx, 0)
51 /* We can use faster code for shift-by-1 under certain conditions. */
56 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
57 leal (%edi,%ebx,4),%eax
59 jnc L(special) /* jump if s_ptr >= res_ptr + size */
65 shrdl %cl,%edx,%eax /* compute carry limb */
66 pushl %eax /* push carry limb onto stack */
67 cfi_adjust_cfa_offset (4)
71 cfi_adjust_cfa_offset (4)
75 movl (%edi),%eax /* fetch destination cache line */
78 L(oop): movl 28(%edi),%eax /* fetch destination cache line */
115 cfi_adjust_cfa_offset (-4)
120 shrdl %cl,%eax,%edx /* compute result limb */
129 shrl %cl,%edx /* compute most significant limb */
130 movl %edx,(%edi) /* store it */
132 popl %eax /* pop carry limb */
133 cfi_adjust_cfa_offset (-4)
136 cfi_adjust_cfa_offset (-4)
139 cfi_adjust_cfa_offset (-4)
142 cfi_adjust_cfa_offset (-4)
145 cfi_adjust_cfa_offset (-4)
150 /* We loop from least significant end of the arrays, which is only
151 permissible if the source and destination don't overlap, since the
152 function is documented to work for overlapping source and destination.
155 cfi_adjust_cfa_offset (16)
156 cfi_rel_offset (edi, 12)
157 cfi_rel_offset (esi, 8)
158 cfi_rel_offset (ebp, 4)
159 cfi_rel_offset (ebx, 0)
161 leal -4(%edi,%ebx,4),%edi
162 leal -4(%esi,%ebx,4),%esi
169 cfi_adjust_cfa_offset (4)
177 movl (%edi),%eax /* fetch destination cache line */
181 movl -28(%edi),%eax /* fetch destination cache line */
212 leal -32(%esi),%esi /* use leal not to clobber carry */
219 cfi_adjust_cfa_offset (-4)
220 sbbl %eax,%eax /* save carry in %eax */
223 addl %eax,%eax /* restore carry from eax */
230 leal -4(%esi),%esi /* use leal not to clobber carry */
237 addl %eax,%eax /* restore carry from eax */
238 L(L1): movl %edx,(%edi) /* store last limb */
244 cfi_adjust_cfa_offset (-4)
247 cfi_adjust_cfa_offset (-4)
250 cfi_adjust_cfa_offset (-4)
253 cfi_adjust_cfa_offset (-4)
257 END (BP_SYM (__mpn_rshift))