1 dnl AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor.
3 dnl Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or
8 dnl modify it under the terms of the GNU Lesser General Public License as
9 dnl published by the Free Software Foundation; either version 3 of the
10 dnl License, or (at your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful,
13 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 dnl Lesser General Public License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
23 C K6: 18.0 cycles/limb
26 C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
29 C This code is only 2 c/l faster than a simple divl, but that's 10% so it's
30 C considered worthwhile (just).
32 defframe(PARAM_INVERSE,16)
33 defframe(PARAM_DIVISOR,12)
34 defframe(PARAM_SIZE, 8)
35 defframe(PARAM_SRC, 4)
39 PROLOGUE(mpn_preinv_mod_1)
42 ASSERT(ae,`cmpl $1, PARAM_SIZE')
43 ASSERT(nz,`testl $0x80000000, PARAM_DIVISOR')
46 pushl %ebp FRAME_pushl()
49 pushl %edi FRAME_pushl()
51 movl PARAM_DIVISOR, %eax
52 pushl %esi FRAME_pushl()
54 movl -4(%ebp,%ecx,4), %esi C src high limb
55 pushl %ebx FRAME_pushl()
57 movl %edx, %edi C first n2 to cancel
58 subl %eax, %esi C first n1 = high-divisor
66 C ecx counter, size to 1
69 C edi old high, for underflow test
72 sbbl %edx, %edi C high n-(q1+1)*d, 0 or -1
75 andl PARAM_DIVISOR, %edi
77 movl -4(%ebp,%ecx,4), %ebx
79 addl %esi, %edi C possible addback
82 sarl $31, %ebx C -n1 = 0 or -1
85 movl PARAM_INVERSE, %edx
86 subl %ebx, %eax C n2+n1
90 andl PARAM_DIVISOR, %ebx C -n1 & d
91 addl %esi, %ebx C nadj = n10 + (-n1&d), ignoring overflow
93 addl %ebx, %eax C low m*(n2+n1) + nadj, giving carry flag
94 leal 1(%edi), %ebx C n2+1
96 adcl %ebx, %edx C 1+high(n2<<32+m*(n2+n1)+nadj) = q1+1
98 movl PARAM_DIVISOR, %eax C d
103 subl %eax, %esi C low n-(q1+1)*d
109 sbbl %edx, %edi C high n-(q1+1)*d, 0 or -1
111 andl PARAM_DIVISOR, %edi
115 leal (%esi,%edi), %eax
124 C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
125 C of q*d is simply -d and the remainder n-q*d = n10+d. This is rarely
129 movl PARAM_DIVISOR, %edi