1 dnl HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
2 dnl add the result to a second limb vector.
4 dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of either:
11 dnl * the GNU Lesser General Public License as published by the Free
12 dnl Software Foundation; either version 3 of the License, or (at your
13 dnl option) any later version.
17 dnl * the GNU General Public License as published by the Free Software
18 dnl Foundation; either version 2 of the License, or (at your option) any
21 dnl or both in parallel, as here.
23 dnl The GNU MP Library is distributed in the hope that it will be useful, but
24 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 dnl You should have received copies of the GNU General Public License and the
29 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
30 dnl see https://www.gnu.org/licenses/.
32 include(`../config.m4')
35 define(`res_ptr',`%r26')
36 define(`s1_ptr',`%r25')
37 define(`size_param',`%r24')
38 define(`s2_limb',`%r23')
40 define(`cylimb',`%r28')
50 define(`hi1',`%r23') C safe to reuse
55 PROLOGUE(mpn_addmul_1)
56 C .callinfo frame=128,no_calls
59 stws s2_limb,-16(%r30)
60 add %r0,%r0,cylimb C clear cy and cylimb
61 addib,< -4,size_param,L(few_limbs)
62 fldws -16(%r30),%fr31R
71 bb,>=,n s1_ptr,29,L(0)
73 fldws,ma 4(s1_ptr),%fr4
75 xmpyu %fr4,%fr31R,%fr5
80 addib,< -1,size_param,L(few_limbs)
83 C start software pipeline ----------------------------------------------------
85 fldds,ma 8(s1_ptr),%fr4
86 fldds,ma 8(s1_ptr),%fr8
88 xmpyu %fr4L,%fr31R,%fr5
89 xmpyu %fr4R,%fr31R,%fr6
90 xmpyu %fr8L,%fr31R,%fr9
91 xmpyu %fr8R,%fr31R,%fr10
112 addib,< -4,size_param,L(end)
113 addc %r0,hi3,cylimb C propagate carry into cylimb
114 C main loop ------------------------------------------------------------------
116 fldds,ma 8(s1_ptr),%fr4
117 fldds,ma 8(s1_ptr),%fr8
120 xmpyu %fr4L,%fr31R,%fr5
122 xmpyu %fr4R,%fr31R,%fr6
124 xmpyu %fr8L,%fr31R,%fr9
126 xmpyu %fr8R,%fr31R,%fr10
147 stws,ma s0,4(res_ptr)
149 stws,ma s1,4(res_ptr)
151 stws,ma s2,4(res_ptr)
153 stws,ma s3,4(res_ptr)
155 addib,>= -4,size_param,L(loop)
156 addc %r0,hi3,cylimb C propagate carry into cylimb
157 C finish software pipeline ---------------------------------------------------
165 stws,ma s0,4(res_ptr)
167 stws,ma s1,4(res_ptr)
169 stws,ma s2,4(res_ptr)
171 stws,ma s3,4(res_ptr)
173 C restore callee-saves registers ---------------------------------------------
181 addib,=,n 4,size_param,L(ret)
184 fldws,ma 4(s1_ptr),%fr4
186 xmpyu %fr4,%fr31R,%fr5
193 stws,ma s0,4(res_ptr)
194 addib,<> -1,size_param,L(loop2)
198 addc %r0,cylimb,cylimb
201 EPILOGUE(mpn_addmul_1)