1 dnl Alpha ev6 nails mpn_addmul_2.
3 dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or
8 dnl modify it under the terms of the GNU Lesser General Public License as
9 dnl published by the Free Software Foundation; either version 3 of the
10 dnl License, or (at your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful,
13 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 dnl Lesser General Public License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
22 C Runs at 4.0 cycles/limb.
24 C We could either go for 2-way unrolling over 11 cycles, or 2.75 c/l,
25 C or 4-way unrolling over 20 cycles, for 2.5 c/l.
34 C Useful register aliases
35 define(`numb_mask',`r24')
50 C Used for temps: r8 r19 r28
52 define(`NAIL_BITS',`GMP_NAIL_BITS')
53 define(`NUMB_BITS',`GMP_NUMB_BITS')
55 C This declaration is munged by configure
59 PROLOGUE(mpn_addmul_2)
61 srl numb_mask,NAIL_BITS,numb_mask
66 bis r31, r31, acc0 C zero acc0
68 bis r31, r31, acc1 C zero acc1
74 mulq v0, ulimb, m0a C U1
75 umulh v0, ulimb, m0b C U1
76 mulq v1, ulimb, m1a C U1
77 umulh v1, ulimb, m1b C U1
82 L(top): bis r31, r31, r31 C U1 nop
83 addq r19, acc0, acc0 C U0 propagate nail
88 srl m0a,NAIL_BITS, r8 C U0
90 mulq v0, ulimb, m0a C U1
92 addq r8, acc0, r19 C U0
93 addq m0b, acc1, acc0 C L1
94 umulh v0, ulimb, m0b C U1
95 bis r31, r31, r31 C L0 nop
97 addq rlimb, r19, r19 C L1 FINAL PROD-SUM
98 srl m1a,NAIL_BITS, r8 C U0
100 mulq v1, ulimb, m1a C U1
102 addq r8, acc0, acc0 C U0
103 bis r31, m1b, acc1 C L1
104 umulh v1, ulimb, m1b C U1
105 and r19,numb_mask, r28 C L0 extract numb part
108 srl r19,NUMB_BITS, r19 C U1 extract nail part
112 L(end): ldq rlimb, 0(rp)
113 addq r19, acc0, acc0 C propagate nail
115 srl m0a,NAIL_BITS, r8 C U0
119 srl m1a,NAIL_BITS, r8 C U0
122 and r19,numb_mask, r28 C extract limb
124 srl r19,NUMB_BITS, r19 C extract nail
127 addq r19, acc0, acc0 C propagate nail
128 and acc0,numb_mask, r28
130 srl acc0,NUMB_BITS, r19