1 dnl x86 mpn_divexact_1 -- mpn by limb exact division.
3 dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or
8 dnl modify it under the terms of the GNU Lesser General Public License as
9 dnl published by the Free Software Foundation; either version 3 of the
10 dnl License, or (at your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful,
13 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 dnl Lesser General Public License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
26 C P6 13.0 odd divisor, 12.0 even (strangely)
32 C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
36 defframe(PARAM_DIVISOR,16)
37 defframe(PARAM_SIZE, 12)
38 defframe(PARAM_SRC, 8)
39 defframe(PARAM_DST, 4)
41 dnl re-use parameter space
42 define(VAR_INVERSE,`PARAM_SRC')
47 PROLOGUE(mpn_divexact_1)
50 movl PARAM_DIVISOR, %eax
51 pushl %ebp FRAME_pushl()
54 pushl %edi FRAME_pushl()
56 pushl %ebx FRAME_pushl()
57 movl $-1, %ecx C shift count
59 pushl %esi FRAME_pushl()
67 leal 1(%eax,%eax), %ebx C d without twos
68 andl $127, %eax C d/2, 7 bits
71 LEA( binvert_limb_table, %edx)
72 movzbl (%eax,%edx), %eax C inv 8 bits
74 movzbl binvert_limb_table(%eax), %eax C inv 8 bits
77 leal (%eax,%eax), %edx C 2*inv
78 movl %ebx, PARAM_DIVISOR C d without twos
80 imull %eax, %eax C inv*inv
85 imull %ebx, %eax C inv*inv*d
87 subl %eax, %edx C inv = 2*inv - inv*inv*d
88 leal (%edx,%edx), %eax C 2*inv
90 imull %edx, %edx C inv*inv
92 leal (%esi,%ebp,4), %esi C src end
93 leal (%edi,%ebp,4), %edi C dst end
96 imull %ebx, %edx C inv*inv*d
98 subl %edx, %eax C inv = 2*inv - inv*inv*d
100 ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
101 pushl %eax FRAME_pushl()
102 imull PARAM_DIVISOR, %eax
104 popl %eax FRAME_popl()')
106 movl %eax, VAR_INVERSE
107 movl (%esi,%ebp,4), %eax C src[0]
115 movl (%esi,%ebp,4), %edx C src[1]
117 shrdl( %cl, %edx, %eax)
119 movl VAR_INVERSE, %edx
124 nop C k6 code alignment
128 C ebx carry bit, 0 or -1
133 C ebp counter, limbs, negative
135 movl -4(%esi,%ebp,4), %eax
136 subl %ebx, %edx C accumulate carry bit
138 movl (%esi,%ebp,4), %ebx
140 shrdl( %cl, %ebx, %eax)
142 subl %edx, %eax C apply carry limb
143 movl VAR_INVERSE, %edx
150 movl %eax, -4(%edi,%ebp,4)
151 movl PARAM_DIVISOR, %edx
159 movl -4(%esi), %eax C src high limb
162 popl %esi FRAME_popl()
164 addl %ebx, %eax C apply carry bit
165 popl %ebx FRAME_popl()
167 subl %edx, %eax C apply carry limb
169 imull VAR_INVERSE, %eax