1 dnl AMD64 mpn_lshsub_n. R = 2^k(U - V).
3 dnl Copyright 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
24 C K8,K9: 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
25 C K10: 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
29 C This was written quickly and not optimized at all, but it runs very well on
30 C K8. But perhaps one could get under 3 c/l. Ideas:
31 C 1) Use indexing to save the 3 LEA
32 C 2) Write reasonable feed-in code
33 C 3) Be more clever about register usage
34 C 4) Unroll more, handling CL negation, carry save/restore cost much now
47 PROLOGUE(mpn_lshsub_n)
56 xor %ebx, %ebx C clear carry save register
57 mov %r8d, %ecx C shift count
58 xor %r15d, %r15d C limb carry
66 add %ebx, %ebx C restore carry flag
71 sbb %ebx, %ebx C save carry flag
90 add %ebx, %ebx C restore carry flag
107 sbb %ebx, %ebx C save carry flag
123 shr %cl, %r15 C used next loop