1 dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
3 dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
5 dnl Copyright 2011 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
22 dnl or both in parallel, as here.
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
38 C P6 model 0-8,10-12 -
40 C P6 model 13 (Dothan)
41 C P4 model 0 (Willamette)
43 C P4 model 2 (Northwood)
44 C P4 model 3 (Prescott)
56 ifdef(`OPERATION_addmul_1',`
58 define(func_1, mpn_addmul_1)
59 define(func_1c, mpn_addmul_1c)')
60 ifdef(`OPERATION_submul_1',`
62 define(func_1, mpn_submul_1)
63 define(func_1c, mpn_submul_1c)')
65 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
85 L(fi3): lea -8(up), up
90 add $1, n C increment and clear carry
93 L(fi1): movd %mm0, %ebx
111 L(fi2): lea 4(up), up
112 add $1, n C increment and clear carry
117 C ALIGN(16) C alignment seems irrelevant
118 L(top): movd 4(up), %mm1
124 L(lo1): psrlq $32, %mm0
132 L(lo0): psrlq $32, %mm1
140 L(lo3): psrlq $32, %mm0
148 L(lo2): psrlq $32, %mm1
155 L(end): adc n, %edx C n is zero here
159 L(wd1): psrlq $32, %mm0
172 mov 20(%esp), %edx C carry