1 dnl Alpha ev6 nails mpn_addmul_3.
3 dnl Copyright 2002, 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or
8 dnl modify it under the terms of the GNU Lesser General Public License as
9 dnl published by the Free Software Foundation; either version 3 of the
10 dnl License, or (at your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful,
13 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 dnl Lesser General Public License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
22 C Runs at 3.0 cycles/limb.
24 C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c).
33 C Useful register aliases
34 define(`numb_mask',`r24')
53 C Used for temps: r8 r19 r28
55 define(`NAIL_BITS',`GMP_NAIL_BITS')
56 define(`NUMB_BITS',`GMP_NUMB_BITS')
58 C This declaration is munged by configure
62 PROLOGUE(mpn_addmul_3)
64 srl numb_mask,NAIL_BITS,numb_mask
70 bis r31, r31, acc0 C zero acc0
72 bis r31, r31, acc1 C zero acc1
74 bis r31, r31, acc2 C zero acc2
80 mulq v0, ulimb, m0a C U1
81 umulh v0, ulimb, m0b C U1
82 mulq v1, ulimb, m1a C U1
83 umulh v1, ulimb, m1b C U1
85 mulq v2, ulimb, m2a C U1
86 umulh v2, ulimb, m2b C U1
90 L(top): ldq rlimb, 0(rp) C L1
92 bis r31, r31, r31 C U0 nop
93 addq r19, acc0, acc0 C U1 propagate nail
96 srl m0a,NAIL_BITS, r8 C U0
98 mulq v0, ulimb, m0a C U1
100 addq r8, acc0, r19 C U0
101 addq m0b, acc1, acc0 C L1
102 umulh v0, ulimb, m0b C U1
103 bis r31, r31, r31 C L0 nop
105 addq rlimb, r19, r19 C L1
106 srl m1a,NAIL_BITS, r8 C U0
107 bis r31, r31, r31 C L0 nop
108 mulq v1, ulimb, m1a C U1
110 addq r8, acc0, acc0 C U0
111 addq m1b, acc2, acc1 C L1
112 umulh v1, ulimb, m1b C U1
113 and r19,numb_mask, r28 C L0 extract numb part
115 bis r31, r31, r31 C L1 nop
116 srl m2a,NAIL_BITS, r8 C U0
118 mulq v2, ulimb, m2a C U1
120 addq r8, acc1, acc1 C L0
121 bis r31, m2b, acc2 C L1
122 umulh v2, ulimb, m2b C U1
123 srl r19,NUMB_BITS, r19 C U0 extract nail part
128 L(end): ldq rlimb, 0(rp)
129 addq r19, acc0, acc0 C propagate nail
131 srl m0a,NAIL_BITS, r8 C U0
135 srl m1a,NAIL_BITS, r8 C U0
138 and r19,numb_mask, r28 C extract limb
139 srl m2a,NAIL_BITS, r8 C U0
142 srl r19,NUMB_BITS, r19 C extract nail
145 addq r19, acc0, acc0 C propagate nail
146 and acc0,numb_mask, r28
148 srl acc0,NUMB_BITS, r19
151 and acc1,numb_mask, r28
153 srl acc1,NUMB_BITS, r19