1 dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
2 dnl subtract the result from a second limb vector.
4 dnl Copyright 1995, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
24 define(`res_ptr',`%r26')
25 define(`s1_ptr',`%r25')
26 define(`size_param',`%r24')
27 define(`s2_limb',`%r23')
29 define(`cylimb',`%r28')
39 define(`hi1',`%r23') C safe to reuse
44 PROLOGUE(mpn_submul_1)
45 C .callinfo frame=128,no_calls
48 stws s2_limb,-16(%r30)
49 add %r0,%r0,cylimb C clear cy and cylimb
50 addib,< -4,size_param,L(few_limbs)
51 fldws -16(%r30),%fr31R
60 bb,>=,n s1_ptr,29,L(0)
62 fldws,ma 4(s1_ptr),%fr4
64 xmpyu %fr4,%fr31R,%fr5
69 add s0,lo0,%r0 C invert cy
70 addib,< -1,size_param,L(few_limbs)
73 C start software pipeline ----------------------------------------------------
75 fldds,ma 8(s1_ptr),%fr4
76 fldds,ma 8(s1_ptr),%fr8
78 xmpyu %fr4L,%fr31R,%fr5
79 xmpyu %fr4R,%fr31R,%fr6
80 xmpyu %fr8L,%fr31R,%fr9
81 xmpyu %fr8R,%fr31R,%fr10
102 addib,< -4,size_param,L(end)
103 addc %r0,hi3,cylimb C propagate carry into cylimb
104 C main loop ------------------------------------------------------------------
106 fldds,ma 8(s1_ptr),%fr4
107 fldds,ma 8(s1_ptr),%fr8
110 xmpyu %fr4L,%fr31R,%fr5
112 xmpyu %fr4R,%fr31R,%fr6
114 xmpyu %fr8L,%fr31R,%fr9
116 xmpyu %fr8R,%fr31R,%fr10
126 subb %r0,%r0,lo0 C these two insns ...
127 add lo0,lo0,%r0 C ... just invert cy
139 stws,ma s0,4(res_ptr)
141 stws,ma s1,4(res_ptr)
143 stws,ma s2,4(res_ptr)
145 stws,ma s3,4(res_ptr)
147 addib,>= -4,size_param,L(loop)
148 addc %r0,hi3,cylimb C propagate carry into cylimb
149 C finish software pipeline ---------------------------------------------------
157 stws,ma s0,4(res_ptr)
159 stws,ma s1,4(res_ptr)
161 stws,ma s2,4(res_ptr)
163 stws,ma s3,4(res_ptr)
164 subb %r0,%r0,lo0 C these two insns ...
165 add lo0,lo0,%r0 C ... invert cy
167 C restore callee-saves registers ---------------------------------------------
175 addib,=,n 4,size_param,L(ret)
178 fldws,ma 4(s1_ptr),%fr4
180 xmpyu %fr4,%fr31R,%fr5
187 add s0,lo0,%r0 C invert cy
188 stws,ma s0,4(res_ptr)
189 addib,<> -1,size_param,L(loop2)
193 addc %r0,cylimb,cylimb
196 EPILOGUE(mpn_submul_1)