1 dnl AMD64 mpn_addlsh_n and mpn_rsblsh_n. R = V2^k +- U.
2 dnl ("rsb" means reversed subtract, name mandated by mpn_sublsh1_n which
3 dnl subtacts the shifted operand from the unshifted operand.)
5 dnl Copyright 2006 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of the GNU Lesser General Public License as published
11 dnl by the Free Software Foundation; either version 3 of the License, or (at
12 dnl your option) any later version.
14 dnl The GNU MP Library is distributed in the hope that it will be useful, but
15 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 dnl License for more details.
19 dnl You should have received a copy of the GNU Lesser General Public License
20 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
22 include(`../config.m4')
26 C K8,K9: 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
27 C K10: 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
31 C This was written quickly and not optimized at all. Surely one could get
32 C closer to 3 c/l or perhaps even under 3 c/l. Ideas:
33 C 1) Use indexing to save the 3 LEA
34 C 2) Write reasonable feed-in code
35 C 3) Be more clever about register usage
36 C 4) Unroll more, handling CL negation, carry save/restore cost much now
46 ifdef(`OPERATION_addlsh_n',`
47 define(ADDSUBC, `adc')
48 define(func, mpn_addlsh_n)
50 ifdef(`OPERATION_rsblsh_n',`
51 define(ADDSUBC, `sbb')
52 define(func, mpn_rsblsh_n)
55 MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
69 xor %ebx, %ebx C clear carry save register
70 mov %r8d, %ecx C shift count
71 xor %r15d, %r15d C limb carry
122 shr %cl, %r15 C used next loop
130 add %ebx, %ebx C restore carry flag
142 sbb %ebx, %ebx C save carry flag