1 dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
2 dnl store sum in a third limb vector.
4 dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
36 bis r31,r31,r25 C clear cy
37 subq r19,4,r19 C decr loop cnt
38 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
39 C Start software pipeline for 1st loop
44 addq r17,32,r17 C update s1_ptr
46 addq r0,r4,r20 C 1st main add
48 subq r19,4,r19 C decr loop cnt
50 cmpult r20,r0,r25 C compute cy from last add
52 addq r1,r5,r28 C 2nd main add
53 addq r18,32,r18 C update s2_ptr
54 addq r28,r25,r21 C 2nd carry add
55 cmpult r28,r5,r8 C compute cy from last add
56 blt r19,$Lend1 C if less than 4 limbs remain, jump
57 C 1st loop handles groups of 4 limbs in a software pipeline
59 $Loop: cmpult r21,r28,r25 C compute cy from last add
61 bis r8,r25,r25 C combine cy from the two adds
63 addq r2,r6,r28 C 3rd main add
65 addq r28,r25,r22 C 3rd carry add
67 cmpult r28,r6,r8 C compute cy from last add
68 cmpult r22,r28,r25 C compute cy from last add
70 bis r8,r25,r25 C combine cy from the two adds
72 addq r3,r7,r28 C 4th main add
73 addq r28,r25,r23 C 4th carry add
74 cmpult r28,r7,r8 C compute cy from last add
75 cmpult r23,r28,r25 C compute cy from last add
76 addq r17,32,r17 C update s1_ptr
77 bis r8,r25,r25 C combine cy from the two adds
78 addq r16,32,r16 C update res_ptr
79 addq r0,r4,r28 C 1st main add
81 addq r25,r28,r20 C 1st carry add
83 cmpult r28,r4,r8 C compute cy from last add
85 cmpult r20,r28,r25 C compute cy from last add
87 bis r8,r25,r25 C combine cy from the two adds
88 subq r19,4,r19 C decr loop cnt
90 addq r1,r5,r28 C 2nd main add
92 addq r25,r28,r21 C 2nd carry add
93 addq r18,32,r18 C update s2_ptr
94 cmpult r28,r5,r8 C compute cy from last add
96 C Finish software pipeline for 1st loop
97 $Lend1: cmpult r21,r28,r25 C compute cy from last add
98 bis r8,r25,r25 C combine cy from the two adds
99 addq r2,r6,r28 C 3rd main add
100 addq r28,r25,r22 C 3rd carry add
101 cmpult r28,r6,r8 C compute cy from last add
102 cmpult r22,r28,r25 C compute cy from last add
104 bis r8,r25,r25 C combine cy from the two adds
106 addq r3,r7,r28 C 4th main add
107 addq r28,r25,r23 C 4th carry add
108 cmpult r28,r7,r8 C compute cy from last add
109 cmpult r23,r28,r25 C compute cy from last add
110 bis r8,r25,r25 C combine cy from the two adds
111 addq r16,32,r16 C update res_ptr
114 $Lend2: addq r19,4,r19 C restore loop cnt
116 C Start software pipeline for 2nd loop
121 C 2nd loop handles remaining 1-3 limbs
123 $Loop0: addq r0,r4,r28 C main add
125 cmpult r28,r4,r8 C compute cy from last add
127 addq r28,r25,r20 C carry add
131 cmpult r20,r28,r25 C compute cy from last add
132 subq r19,1,r19 C decr loop cnt
133 bis r8,r25,r25 C combine cy from the two adds
136 $Lend0: addq r0,r4,r28 C main add
137 addq r28,r25,r20 C carry add
138 cmpult r28,r4,r8 C compute cy from last add
139 cmpult r20,r28,r25 C compute cy from last add
141 bis r8,r25,r25 C combine cy from the two adds
143 $Lret: bis r25,r31,r0 C return cy