1 dnl Alpha ev6 nails mpn_addmul_4.
3 dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl or both in parallel, as here.
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C Runs at 2.5 cycles/limb.
35 C We should go for 2-way unrolling over 17 cycles, for 2.125 c/l corresponding
45 C Useful register aliases
46 define(`numb_mask',`r24')
69 C Used for temps: r8 r19 r28
71 define(`NAIL_BITS',`GMP_NAIL_BITS')
72 define(`NUMB_BITS',`GMP_NUMB_BITS')
74 C This declaration is munged by configure
78 PROLOGUE(mpn_addmul_4)
86 srl numb_mask,NAIL_BITS,numb_mask
93 bis r31, r31, acc0 C zero acc0
95 bis r31, r31, acc1 C zero acc1
97 bis r31, r31, acc2 C zero acc2
99 bis r31, r31, acc3 C zero acc3
105 mulq v0, ulimb, m0a C U1
106 umulh v0, ulimb, m0b C U1
107 mulq v1, ulimb, m1a C U1
108 umulh v1, ulimb, m1b C U1
110 mulq v2, ulimb, m2a C U1
111 umulh v2, ulimb, m2b C U1
112 mulq v3, ulimb, m3a C U1
113 umulh v3, ulimb, m3b C U1
117 L(top): bis r31, r31, r31 C U1 nop
118 ldq rlimb, 0(rp) C L0
119 ldq ulimb, 0(up) C L1
120 addq r19, acc0, acc0 C U0 propagate nail
122 bis r31, r31, r31 C L0 nop
123 bis r31, r31, r31 C U1 nop
124 bis r31, r31, r31 C L1 nop
125 bis r31, r31, r31 C U0 nop
128 srl m0a,NAIL_BITS, r8 C U0
130 mulq v0, ulimb, m0a C U1
132 addq r8, acc0, r19 C U0
133 addq m0b, acc1, acc0 C L0
134 umulh v0, ulimb, m0b C U1
135 bis r31, r31, r31 C L1 nop
137 addq rlimb, r19, r19 C L0
138 srl m1a,NAIL_BITS, r8 C U0
139 bis r31, r31, r31 C L1 nop
140 mulq v1, ulimb, m1a C U1
142 addq r8, acc0, acc0 C U0
143 addq m1b, acc2, acc1 C L0
144 umulh v1, ulimb, m1b C U1
145 and r19,numb_mask, r28 C L1 extract numb part
147 bis r31, r31, r31 C L0 nop
148 srl m2a,NAIL_BITS, r8 C U0
150 mulq v2, ulimb, m2a C U1
152 addq r8, acc1, acc1 C L1
153 addq m2b, acc3, acc2 C L0
154 umulh v2, ulimb, m2b C U1
155 srl r19,NUMB_BITS, r19 C U0 extract nail part
157 bis r31, r31, r31 C L0 nop
158 srl m3a,NAIL_BITS, r8 C U0
160 mulq v3, ulimb, m3a C U1
162 addq r8, acc2, acc2 C L0
163 bis r31, m3b, acc3 C L1
164 umulh v3, ulimb, m3b C U1
167 L(end): ldq rlimb, 0(rp)
168 addq r19, acc0, acc0 C propagate nail
169 lda rp, 8(rp) C FIXME: DELETE
170 srl m0a,NAIL_BITS, r8 C U0
174 srl m1a,NAIL_BITS, r8 C U0
177 and r19,numb_mask, r28 C extract limb
178 srl m2a,NAIL_BITS, r8 C U0
181 srl r19,NUMB_BITS, r19 C extract nail
182 srl m3a,NAIL_BITS, r8 C U0
187 addq r19, acc0, acc0 C propagate nail
188 and acc0,numb_mask, r28
190 srl acc0,NUMB_BITS, r19
193 and acc1,numb_mask, r28
195 srl acc1,NUMB_BITS, r19
198 and acc2,numb_mask, r28
200 srl acc2,NUMB_BITS, r19