1 dnl Alpha ev6 nails mpn_addmul_4.
3 dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or
8 dnl modify it under the terms of the GNU Lesser General Public License as
9 dnl published by the Free Software Foundation; either version 3 of the
10 dnl License, or (at your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful,
13 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
14 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 dnl Lesser General Public License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
22 C Runs at 2.5 cycles/limb.
24 C We should go for 2-way unrolling over 17 cycles, for 2.125 c/l corresponding
34 C Useful register aliases
35 define(`numb_mask',`r24')
58 C Used for temps: r8 r19 r28
60 define(`NAIL_BITS',`GMP_NAIL_BITS')
61 define(`NUMB_BITS',`GMP_NUMB_BITS')
63 C This declaration is munged by configure
67 PROLOGUE(mpn_addmul_4)
75 srl numb_mask,NAIL_BITS,numb_mask
82 bis r31, r31, acc0 C zero acc0
84 bis r31, r31, acc1 C zero acc1
86 bis r31, r31, acc2 C zero acc2
88 bis r31, r31, acc3 C zero acc3
94 mulq v0, ulimb, m0a C U1
95 umulh v0, ulimb, m0b C U1
96 mulq v1, ulimb, m1a C U1
97 umulh v1, ulimb, m1b C U1
99 mulq v2, ulimb, m2a C U1
100 umulh v2, ulimb, m2b C U1
101 mulq v3, ulimb, m3a C U1
102 umulh v3, ulimb, m3b C U1
106 L(top): bis r31, r31, r31 C U1 nop
107 ldq rlimb, 0(rp) C L0
108 ldq ulimb, 0(up) C L1
109 addq r19, acc0, acc0 C U0 propagate nail
111 bis r31, r31, r31 C L0 nop
112 bis r31, r31, r31 C U1 nop
113 bis r31, r31, r31 C L1 nop
114 bis r31, r31, r31 C U0 nop
117 srl m0a,NAIL_BITS, r8 C U0
119 mulq v0, ulimb, m0a C U1
121 addq r8, acc0, r19 C U0
122 addq m0b, acc1, acc0 C L0
123 umulh v0, ulimb, m0b C U1
124 bis r31, r31, r31 C L1 nop
126 addq rlimb, r19, r19 C L0
127 srl m1a,NAIL_BITS, r8 C U0
128 bis r31, r31, r31 C L1 nop
129 mulq v1, ulimb, m1a C U1
131 addq r8, acc0, acc0 C U0
132 addq m1b, acc2, acc1 C L0
133 umulh v1, ulimb, m1b C U1
134 and r19,numb_mask, r28 C L1 extract numb part
136 bis r31, r31, r31 C L0 nop
137 srl m2a,NAIL_BITS, r8 C U0
139 mulq v2, ulimb, m2a C U1
141 addq r8, acc1, acc1 C L1
142 addq m2b, acc3, acc2 C L0
143 umulh v2, ulimb, m2b C U1
144 srl r19,NUMB_BITS, r19 C U0 extract nail part
146 bis r31, r31, r31 C L0 nop
147 srl m3a,NAIL_BITS, r8 C U0
149 mulq v3, ulimb, m3a C U1
151 addq r8, acc2, acc2 C L0
152 bis r31, m3b, acc3 C L1
153 umulh v3, ulimb, m3b C U1
156 L(end): ldq rlimb, 0(rp)
157 addq r19, acc0, acc0 C propagate nail
158 lda rp, 8(rp) C FIXME: DELETE
159 srl m0a,NAIL_BITS, r8 C U0
163 srl m1a,NAIL_BITS, r8 C U0
166 and r19,numb_mask, r28 C extract limb
167 srl m2a,NAIL_BITS, r8 C U0
170 srl r19,NUMB_BITS, r19 C extract nail
171 srl m3a,NAIL_BITS, r8 C U0
176 addq r19, acc0, acc0 C propagate nail
177 and acc0,numb_mask, r28
179 srl acc0,NUMB_BITS, r19
182 and acc1,numb_mask, r28
184 srl acc1,NUMB_BITS, r19
187 and acc2,numb_mask, r28
189 srl acc2,NUMB_BITS, r19