Upload Tizen:Base source
[external/gmp.git] / mpn / ia64 / aors_n.asm
1 dnl  IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
2
3 dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
4
5 dnl  This file is part of the GNU MP Library.
6
7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl  it under the terms of the GNU Lesser General Public License as published
9 dnl  by the Free Software Foundation; either version 3 of the License, or (at
10 dnl  your option) any later version.
11
12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 dnl  License for more details.
16
17 dnl  You should have received a copy of the GNU Lesser General Public License
18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C           cycles/limb
23 C Itanium:      2.67
24 C Itanium 2:    1.25
25
26 C TODO
27 C  * Consider using special code for small n, using something like
28 C    "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
29
30 C INPUT PARAMETERS
31 define(`rp',`r32')
32 define(`up',`r33')
33 define(`vp',`r34')
34 define(`n',`r35')
35
36 ifdef(`OPERATION_add_n',`
37   define(ADDSUB,        add)
38   define(PRED,          ltu)
39   define(INCR,          1)
40   define(LIM,           -1)
41   define(func, mpn_add_n)
42 ')
43 ifdef(`OPERATION_sub_n',`
44   define(ADDSUB,        sub)
45   define(PRED,          gtu)
46   define(INCR,          -1)
47   define(LIM,           0)
48   define(func, mpn_sub_n)
49 ')
50
51 C Some useful aliases for registers we use
52 define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
53 define(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')
54 define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
55 define(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')
56 define(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')
57 define(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')
58 define(`rpx',`r3')
59
60 MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
61
62 ASM_START()
63 PROLOGUE(func)
64         .prologue
65         .save   ar.lc, r2
66         .body
67 ifdef(`HAVE_ABI_32',`
68         addp4           rp = 0, rp              C                       M I
69         addp4           up = 0, up              C                       M I
70         addp4           vp = 0, vp              C                       M I
71         zxt4            n = n                   C                       I
72         ;;
73 ')
74 {.mmi           C 00
75         ld8             r11 = [vp], 8           C                       M01
76         ld8             r10 = [up], 8           C                       M01
77         mov.i           r2 = ar.lc              C                       I0
78 }
79 {.mmi
80         and             r14 = 7, n              C                       M I
81         cmp.lt          p15, p14 = 8, n         C                       M I
82         add             n = -8, n               C                       M I
83         ;;
84 }
85 {.mmi           C 01
86         cmp.eq          p6, p0 = 1, r14         C                       M I
87         cmp.eq          p7, p0 = 2, r14         C                       M I
88         cmp.eq          p8, p0 = 3, r14         C                       M I
89 }
90 {.bbb
91    (p6) br.dptk         .Lb001                  C                       B
92    (p7) br.dptk         .Lb010                  C                       B
93    (p8) br.dptk         .Lb011                  C                       B
94         ;;
95 }
96 {.mmi           C 02
97         cmp.eq          p9, p0 = 4, r14         C                       M I
98         cmp.eq          p10, p0 = 5, r14        C                       M I
99         cmp.eq          p11, p0 = 6, r14        C                       M I
100 }
101 {.bbb
102    (p9) br.dptk         .Lb100                  C                       B
103   (p10) br.dptk         .Lb101                  C                       B
104   (p11) br.dptk         .Lb110                  C                       B
105         ;;
106 }               C 03
107 {.mmb
108         cmp.eq          p12, p0 = 7, r14        C                       M I
109         add             n = -1, n               C loop count            M I
110   (p12) br.dptk         .Lb111                  C                       B
111 }
112
113
114 .Lb000: ld8             v2 = [vp], 8            C                       M01
115         ld8             u2 = [up], 8            C                       M01
116         add             rpx = 8, rp             C                       M I
117         ;;
118         ld8             v3 = [vp], 8            C                       M01
119         ld8             u3 = [up], 8            C                       M01
120         ADDSUB          w1 = r10, r11           C                       M I
121         ;;
122         ld8             v4 = [vp], 8            C                       M01
123         ld8             u4 = [up], 8            C                       M01
124         cmp.PRED        p7, p0 = w1, r10        C                       M I
125         ;;
126         ld8             v5 = [vp], 8            C                       M01
127         ld8             u5 = [up], 8            C                       M01
128         ADDSUB          w2 = u2, v2             C                       M I
129         ;;
130         ld8             v6 = [vp], 8            C                       M01
131         ld8             u6 = [up], 8            C                       M01
132         cmp.PRED        p8, p0 = w2, u2         C                       M I
133         ;;
134         ld8             v7 = [vp], 8            C                       M01
135         ld8             u7 = [up], 8            C                       M01
136         ADDSUB          w3 = u3, v3             C                       M I
137         ;;
138         ld8             v0 = [vp], 8            C                       M01
139         ld8             u0 = [up], 8            C                       M01
140         cmp.PRED        p9, p0 = w3, u3         C                       M I
141    (p7) cmp.eq.or       p8, p0 = LIM, w2        C                       M I
142    (p7) add             w2 = INCR, w2           C                       M I
143   (p14) br.cond.dptk    .Lcj8                   C                       B
144         ;;
145
146 .grt8:  ld8             v1 = [vp], 8            C                       M01
147         ld8             u1 = [up], 8            C                       M01
148         shr.u           n = n, 3                C                       I0
149         ;;
150         add             r11 = 512, vp
151         ld8             v2 = [vp], 8            C                       M01
152         add             r10 = 512, up
153         ld8             u2 = [up], 8            C                       M01
154         nop.i           0
155         nop.b           0
156         ;;
157         ld8             v3 = [vp], 8            C                       M01
158         ld8             u3 = [up], 8            C                       M01
159         mov.i           ar.lc = n               C                       I0
160         br              .LL000                  C                       B
161
162 .Lb001: add             rpx = 16, rp            C                       M I
163         ADDSUB          w0 = r10, r11           C                       M I
164   (p15) br.cond.dpnt    .grt1                   C                       B
165         ;;
166         cmp.PRED        p6, p0 = w0, r10        C                       M I
167         mov             r8 = 0                  C                       M I
168         br              .Lcj1                   C                       B
169
170 .grt1:  ld8             v1 = [vp], 8            C                       M01
171         ld8             u1 = [up], 8            C                       M01
172         shr.u           n = n, 3                C                       I0
173         ;;
174         ld8             v2 = [vp], 8            C                       M01
175         ld8             u2 = [up], 8            C                       M01
176         cmp.ne          p9, p0 = r0, r0         C read near Loop
177         ;;
178         ld8             v3 = [vp], 8            C                       M01
179         ld8             u3 = [up], 8            C                       M01
180         mov.i           ar.lc = n               C                       I0
181         ;;
182         ld8             v4 = [vp], 8            C                       M01
183         ld8             u4 = [up], 8            C                       M01
184         cmp.PRED        p6, p0 = w0, r10        C                       M I
185         ;;
186         ld8             v5 = [vp], 8            C                       M01
187         ld8             u5 = [up], 8            C                       M01
188         ADDSUB          w1 = u1, v1             C                       M I
189         ;;
190         ld8             v6 = [vp], 8            C                       M01
191         ld8             u6 = [up], 8            C                       M01
192         cmp.PRED        p7, p0 = w1, u1         C                       M I
193         ;;
194         ld8             v7 = [vp], 8            C                       M01
195         ld8             u7 = [up], 8            C                       M01
196         ADDSUB          w2 = u2, v2             C                       M I
197         ;;
198         add             r11 = 512, vp
199         ld8             v0 = [vp], 8            C                       M01
200         add             r10 = 512, up
201         ld8             u0 = [up], 8            C                       M01
202         br.cloop.dptk   .Loop                   C                       B
203         br              .Lcj9                   C                       B
204
205 .Lb010: ld8             v0 = [vp], 8            C                       M01
206         ld8             u0 = [up], 8            C                       M01
207         add             rpx = 24, rp            C                       M I
208         ADDSUB          w7 = r10, r11           C                       M I
209   (p15) br.cond.dpnt    .grt2                   C                       B
210         ;;
211         cmp.PRED        p9, p0 = w7, r10        C                       M I
212         ADDSUB          w0 = u0, v0             C                       M I
213         br              .Lcj2                   C                       B
214
215 .grt2:  ld8             v1 = [vp], 8            C                       M01
216         ld8             u1 = [up], 8            C                       M01
217         shr.u           n = n, 3                C                       I0
218         ;;
219         ld8             v2 = [vp], 8            C                       M01
220         ld8             u2 = [up], 8            C                       M01
221         ;;
222         ld8             v3 = [vp], 8            C                       M01
223         ld8             u3 = [up], 8            C                       M01
224         mov.i           ar.lc = n               C                       I0
225         ;;
226         ld8             v4 = [vp], 8            C                       M01
227         ld8             u4 = [up], 8            C                       M01
228         ;;
229         ld8             v5 = [vp], 8            C                       M01
230         ld8             u5 = [up], 8            C                       M01
231         cmp.PRED        p9, p0 = w7, r10        C                       M I
232         ;;
233         ld8             v6 = [vp], 8            C                       M01
234         ld8             u6 = [up], 8            C                       M01
235         ADDSUB          w0 = u0, v0             C                       M I
236         ;;
237         add             r11 = 512, vp
238         ld8             v7 = [vp], 8            C                       M01
239         add             r10 = 512, up
240         ld8             u7 = [up], 8            C                       M01
241         br              .LL01x                  C                       B
242
243 .Lb011: ld8             v7 = [vp], 8            C                       M01
244         ld8             u7 = [up], 8            C                       M01
245         ADDSUB          w6 = r10, r11           C                       M I
246         ;;
247         ld8             v0 = [vp], 8            C                       M01
248         ld8             u0 = [up], 8            C                       M01
249   (p15) br.cond.dpnt    .grt3                   C                       B
250         ;;
251         cmp.PRED        p8, p0 = w6, r10        C                       M I
252         ADDSUB          w7 = u7, v7             C                       M I
253         ;;
254         st8             [rp] = w6, 8            C                       M23
255         cmp.PRED        p9, p0 = w7, u7         C                       M I
256         br              .Lcj3                   C                       B
257
258 .grt3:  ld8             v1 = [vp], 8            C                       M01
259         ld8             u1 = [up], 8            C                       M01
260         add             rpx = 32, rp            C                       M I
261         ;;
262         ld8             v2 = [vp], 8            C                       M01
263         ld8             u2 = [up], 8            C                       M01
264         shr.u           n = n, 3                C                       I0
265         ;;
266         ld8             v3 = [vp], 8            C                       M01
267         ld8             u3 = [up], 8            C                       M01
268         cmp.PRED        p8, p0 = w6, r10        C                       M I
269         ;;
270         ld8             v4 = [vp], 8            C                       M01
271         ld8             u4 = [up], 8            C                       M01
272         mov.i           ar.lc = n               C                       I0
273         ADDSUB          w7 = u7, v7             C                       M I
274         nop.i           0
275         nop.b           0
276         ;;
277         ld8             v5 = [vp], 8            C                       M01
278         ld8             u5 = [up], 8            C                       M01
279         cmp.PRED        p9, p0 = w7, u7         C                       M I
280         ;;
281         add             r11 = 512, vp
282         ld8             v6 = [vp], 8            C                       M01
283         add             r10 = 512, up
284         ld8             u6 = [up], 8            C                       M01
285    (p8) cmp.eq.or       p9, p0 = LIM, w7        C                       M I
286         ;;
287         ld8             v7 = [vp], 8            C                       M01
288         ld8             u7 = [up], 8            C                       M01
289    (p8) add             w7 = INCR, w7           C                       M I
290         st8             [rp] = w6, 8            C                       M23
291         ADDSUB          w0 = u0, v0             C                       M I
292         br              .LL01x                  C                       B
293
294 .Lb100: ld8             v6 = [vp], 8            C                       M01
295         ld8             u6 = [up], 8            C                       M01
296         add             rpx = 8, rp             C                       M I
297         ;;
298         ld8             v7 = [vp], 8            C                       M01
299         ld8             u7 = [up], 8            C                       M01
300         ADDSUB          w5 = r10, r11           C                       M I
301         ;;
302         ld8             v0 = [vp], 8            C                       M01
303         ld8             u0 = [up], 8            C                       M01
304   (p15) br.cond.dpnt    .grt4                   C                       B
305         ;;
306         cmp.PRED        p7, p0 = w5, r10        C                       M I
307         ADDSUB          w6 = u6, v6             C                       M I
308         ;;
309         cmp.PRED        p8, p0 = w6, u6         C                       M I
310         ADDSUB          w7 = u7, v7             C                       M I
311         br              .Lcj4                   C                       B
312
313 .grt4:  ld8             v1 = [vp], 8            C                       M01
314         ld8             u1 = [up], 8            C                       M01
315         shr.u           n = n, 3                C                       I0
316         cmp.PRED        p7, p0 = w5, r10        C                       M I
317         ;;
318         ld8             v2 = [vp], 8            C                       M01
319         ld8             u2 = [up], 8            C                       M01
320         ADDSUB          w6 = u6, v6             C                       M I
321         ;;
322         ld8             v3 = [vp], 8            C                       M01
323         ld8             u3 = [up], 8            C                       M01
324         cmp.PRED        p8, p0 = w6, u6         C                       M I
325         ;;
326         ld8             v4 = [vp], 8            C                       M01
327         ld8             u4 = [up], 8            C                       M01
328         mov.i           ar.lc = n               C                       I0
329         ;;
330         ld8             v5 = [vp], 8            C                       M01
331         ld8             u5 = [up], 8            C                       M01
332         ADDSUB          w7 = u7, v7             C                       M I
333         ;;
334         add             r11 = 512, vp
335         ld8             v6 = [vp], 8            C                       M01
336         add             r10 = 512, up
337         ld8             u6 = [up], 8            C                       M01
338         cmp.PRED        p9, p0 = w7, u7         C                       M I
339         ;;
340         ld8             v7 = [vp], 8            C                       M01
341         ld8             u7 = [up], 8            C                       M01
342    (p7) cmp.eq.or       p8, p0 = LIM, w6        C                       M I
343    (p7) add             w6 = INCR, w6           C                       M I
344         br              .LL100                  C                       B
345
346 .Lb101: ld8             v5 = [vp], 8            C                       M01
347         ld8             u5 = [up], 8            C                       M01
348         add             rpx = 16, rp            C                       M I
349         ;;
350         ld8             v6 = [vp], 8            C                       M01
351         ld8             u6 = [up], 8            C                       M01
352         ADDSUB          w4 = r10, r11           C                       M I
353         ;;
354         ld8             v7 = [vp], 8            C                       M01
355         ld8             u7 = [up], 8            C                       M01
356         cmp.PRED        p6, p0 = w4, r10        C                       M I
357         ;;
358         ld8             v0 = [vp], 8            C                       M01
359         ld8             u0 = [up], 8            C                       M01
360         ADDSUB          w5 = u5, v5             C                       M I
361         shr.u           n = n, 3                C                       I0
362   (p15) br.cond.dpnt    .grt5                   C                       B
363         ;;
364         cmp.PRED        p7, p0 = w5, u5         C                       M I
365         ADDSUB          w6 = u6, v6             C                       M I
366         br              .Lcj5                   C                       B
367
368 .grt5:  ld8             v1 = [vp], 8            C                       M01
369         ld8             u1 = [up], 8            C                       M01
370         ;;
371         ld8             v2 = [vp], 8            C                       M01
372         ld8             u2 = [up], 8            C                       M01
373         mov.i           ar.lc = n               C                       I0
374         ;;
375         ld8             v3 = [vp], 8            C                       M01
376         ld8             u3 = [up], 8            C                       M01
377         cmp.PRED        p7, p0 = w5, u5         C                       M I
378         ;;
379         ld8             v4 = [vp], 8            C                       M01
380         ld8             u4 = [up], 8            C                       M01
381         ADDSUB          w6 = u6, v6             C                       M I
382         ;;
383         add             r11 = 512, vp
384         ld8             v5 = [vp], 8            C                       M01
385         add             r10 = 512, up
386         ld8             u5 = [up], 8            C                       M01
387         br              .LL101                  C                       B
388
389 .Lb110: ld8             v4 = [vp], 8            C                       M01
390         ld8             u4 = [up], 8            C                       M01
391         add             rpx = 24, rp            C                       M I
392         ;;
393         ld8             v5 = [vp], 8            C                       M01
394         ld8             u5 = [up], 8            C                       M01
395         ADDSUB          w3 = r10, r11           C                       M I
396         ;;
397         ld8             v6 = [vp], 8            C                       M01
398         ld8             u6 = [up], 8            C                       M01
399         shr.u           n = n, 3                C                       I0
400         ;;
401         ld8             v7 = [vp], 8            C                       M01
402         ld8             u7 = [up], 8            C                       M01
403         cmp.PRED        p9, p0 = w3, r10        C                       M I
404         ;;
405         ld8             v0 = [vp], 8            C                       M01
406         ld8             u0 = [up], 8            C                       M01
407         ADDSUB          w4 = u4, v4             C                       M I
408   (p14) br.cond.dptk    .Lcj67                  C                       B
409         ;;
410
411 .grt6:  ld8             v1 = [vp], 8            C                       M01
412         ld8             u1 = [up], 8            C                       M01
413         mov.i           ar.lc = n               C                       I0
414         cmp.PRED        p9, p0 = w3, r10        C                       M I
415         nop.i           0
416         nop.b           0
417         ;;
418         ld8             v2 = [vp], 8            C                       M01
419         ld8             u2 = [up], 8            C                       M01
420         ADDSUB          w4 = u4, v4             C                       M I
421         ;;
422         add             r11 = 512, vp
423         ld8             v3 = [vp], 8            C                       M01
424         add             r10 = 512, up
425         ld8             u3 = [up], 8            C                       M01
426         br              .LL11x                  C                       B
427
428 .Lb111: ld8             v3 = [vp], 8            C                       M01
429         ld8             u3 = [up], 8            C                       M01
430         add             rpx = 32, rp            C                       M I
431         ;;
432         ld8             v4 = [vp], 8            C                       M01
433         ld8             u4 = [up], 8            C                       M01
434         ADDSUB          w2 = r10, r11           C                       M I
435         ;;
436         ld8             v5 = [vp], 8            C                       M01
437         ld8             u5 = [up], 8            C                       M01
438         cmp.PRED        p8, p0 = w2, r10        C                       M I
439         ;;
440         ld8             v6 = [vp], 8            C                       M01
441         ld8             u6 = [up], 8            C                       M01
442         ADDSUB          w3 = u3, v3             C                       M I
443         ;;
444         ld8             v7 = [vp], 8            C                       M01
445         ld8             u7 = [up], 8            C                       M01
446         cmp.PRED        p9, p0 = w3, u3         C                       M I
447         ;;
448         ld8             v0 = [vp], 8            C                       M01
449         ld8             u0 = [up], 8            C                       M01
450   (p15) br.cond.dpnt    .grt7                   C                       B
451         ;;
452         st8             [rp] = w2, 8            C                       M23
453    (p8) cmp.eq.or       p9, p0 = LIM, w3        C                       M I
454    (p8) add             w3 = INCR, w3           C                       M I
455         ADDSUB          w4 = u4, v4             C                       M I
456         br              .Lcj67                  C                       B
457
458 .grt7:  ld8             v1 = [vp], 8            C                       M01
459         ld8             u1 = [up], 8            C                       M01
460         shr.u           n = n, 3                C                       I0
461    (p8) cmp.eq.or       p9, p0 = LIM, w3        C                       M I
462         nop.i           0
463         nop.b           0
464         ;;
465         add             r11 = 512, vp
466         ld8             v2 = [vp], 8            C                       M01
467         add             r10 = 512, up
468         ld8             u2 = [up], 8            C                       M01
469    (p8) add             w3 = INCR, w3           C                       M I
470         nop.b           0
471         ;;
472         ld8             v3 = [vp], 8            C                       M01
473         ld8             u3 = [up], 8            C                       M01
474         mov.i           ar.lc = n               C                       I0
475         st8             [rp] = w2, 8            C                       M23
476         ADDSUB          w4 = u4, v4             C                       M I
477         br              .LL11x                  C                       B
478
479 C *** MAIN LOOP START ***
480         ALIGN(32)
481 .Loop:  ld8             v1 = [vp], 8            C                       M01
482         cmp.PRED        p7, p0 = w1, u1         C                       M I
483    (p9) cmp.eq.or       p6, p0 = LIM, w0        C                       M I
484         ld8             u1 = [up], 8            C                       M01
485    (p9) add             w0 = INCR, w0           C                       M I
486         ADDSUB          w2 = u2, v2             C                       M I
487         ;;
488         ld8             v2 = [vp], 8            C                       M01
489         cmp.PRED        p8, p0 = w2, u2         C                       M I
490    (p6) cmp.eq.or       p7, p0 = LIM, w1        C                       M I
491         ld8             u2 = [up], 8            C                       M01
492    (p6) add             w1 = INCR, w1           C                       M I
493         ADDSUB          w3 = u3, v3             C                       M I
494         ;;
495         st8             [rp] = w0, 8            C                       M23
496         ld8             v3 = [vp], 8            C                       M01
497         cmp.PRED        p9, p0 = w3, u3         C                       M I
498    (p7) cmp.eq.or       p8, p0 = LIM, w2        C                       M I
499         ld8             u3 = [up], 8            C                       M01
500    (p7) add             w2 = INCR, w2           C                       M I
501         ;;
502 .LL000: st8             [rp] = w1, 16           C                       M23
503         st8             [rpx] = w2, 32          C                       M23
504    (p8) cmp.eq.or       p9, p0 = LIM, w3        C                       M I
505         lfetch          [r10], 64
506    (p8) add             w3 = INCR, w3           C                       M I
507         ADDSUB          w4 = u4, v4             C                       M I
508         ;;
509 .LL11x: st8             [rp] = w3, 8            C                       M23
510         ld8             v4 = [vp], 8            C                       M01
511         cmp.PRED        p6, p0 = w4, u4         C                       M I
512         ld8             u4 = [up], 8            C                       M01
513         ADDSUB          w5 = u5, v5             C                       M I
514         ;;
515         ld8             v5 = [vp], 8            C                       M01
516         cmp.PRED        p7, p0 = w5, u5         C                       M I
517    (p9) cmp.eq.or       p6, p0 = LIM, w4        C                       M I
518         ld8             u5 = [up], 8            C                       M01
519    (p9) add             w4 = INCR, w4           C                       M I
520         ADDSUB          w6 = u6, v6             C                       M I
521         ;;
522 .LL101: ld8             v6 = [vp], 8            C                       M01
523         cmp.PRED        p8, p0 = w6, u6         C                       M I
524    (p6) cmp.eq.or       p7, p0 = LIM, w5        C                       M I
525         ld8             u6 = [up], 8            C                       M01
526    (p6) add             w5 = INCR, w5           C                       M I
527         ADDSUB          w7 = u7, v7             C                       M I
528         ;;
529         st8             [rp] = w4, 8            C                       M23
530         ld8             v7 = [vp], 8            C                       M01
531         cmp.PRED        p9, p0 = w7, u7         C                       M I
532    (p7) cmp.eq.or       p8, p0 = LIM, w6        C                       M I
533         ld8             u7 = [up], 8            C                       M01
534    (p7) add             w6 = INCR, w6           C                       M I
535         ;;
536 .LL100: st8             [rp] = w5, 16           C                       M23
537         st8             [rpx] = w6, 32          C                       M23
538    (p8) cmp.eq.or       p9, p0 = LIM, w7        C                       M I
539         lfetch          [r11], 64
540    (p8) add             w7 = INCR, w7           C                       M I
541         ADDSUB          w0 = u0, v0             C                       M I
542         ;;
543 .LL01x: st8             [rp] = w7, 8            C                       M23
544         ld8             v0 = [vp], 8            C                       M01
545         cmp.PRED        p6, p0 = w0, u0         C                       M I
546         ld8             u0 = [up], 8            C                       M01
547         ADDSUB          w1 = u1, v1             C                       M I
548         br.cloop.dptk   .Loop                   C                       B
549         ;;
550 C *** MAIN LOOP END ***
551
552         cmp.PRED        p7, p0 = w1, u1         C                       M I
553    (p9) cmp.eq.or       p6, p0 = LIM, w0        C                       M I
554    (p9) add             w0 = INCR, w0           C                       M I
555         ADDSUB          w2 = u2, v2             C                       M I
556         ;;
557 .Lcj9:  cmp.PRED        p8, p0 = w2, u2         C                       M I
558    (p6) cmp.eq.or       p7, p0 = LIM, w1        C                       M I
559         st8             [rp] = w0, 8            C                       M23
560    (p6) add             w1 = INCR, w1           C                       M I
561         ADDSUB          w3 = u3, v3             C                       M I
562         ;;
563         cmp.PRED        p9, p0 = w3, u3         C                       M I
564    (p7) cmp.eq.or       p8, p0 = LIM, w2        C                       M I
565    (p7) add             w2 = INCR, w2           C                       M I
566         ;;
567 .Lcj8:  st8             [rp] = w1, 16           C                       M23
568         st8             [rpx] = w2, 32          C                       M23
569    (p8) cmp.eq.or       p9, p0 = LIM, w3        C                       M I
570    (p8) add             w3 = INCR, w3           C                       M I
571         ADDSUB          w4 = u4, v4             C                       M I
572         ;;
573 .Lcj67: st8             [rp] = w3, 8            C                       M23
574         cmp.PRED        p6, p0 = w4, u4         C                       M I
575         ADDSUB          w5 = u5, v5             C                       M I
576         ;;
577         cmp.PRED        p7, p0 = w5, u5         C                       M I
578    (p9) cmp.eq.or       p6, p0 = LIM, w4        C                       M I
579    (p9) add             w4 = INCR, w4           C                       M I
580         ADDSUB          w6 = u6, v6             C                       M I
581         ;;
582 .Lcj5:  cmp.PRED        p8, p0 = w6, u6         C                       M I
583    (p6) cmp.eq.or       p7, p0 = LIM, w5        C                       M I
584         st8             [rp] = w4, 8            C                       M23
585    (p6) add             w5 = INCR, w5           C                       M I
586         ADDSUB          w7 = u7, v7             C                       M I
587         ;;
588 .Lcj4:  cmp.PRED        p9, p0 = w7, u7         C                       M I
589    (p7) cmp.eq.or       p8, p0 = LIM, w6        C                       M I
590    (p7) add             w6 = INCR, w6           C                       M I
591         ;;
592         st8             [rp] = w5, 16           C                       M23
593         st8             [rpx] = w6, 32          C                       M23
594 .Lcj3:
595    (p8) cmp.eq.or       p9, p0 = LIM, w7        C                       M I
596    (p8) add             w7 = INCR, w7           C                       M I
597         ADDSUB          w0 = u0, v0             C                       M I
598         ;;
599 .Lcj2:  st8             [rp] = w7, 8            C                       M23
600         cmp.PRED        p6, p0 = w0, u0         C                       M I
601         ;;
602    (p9) cmp.eq.or       p6, p0 = LIM, w0        C                       M I
603    (p9) add             w0 = INCR, w0           C                       M I
604         mov             r8 = 0                  C                       M I
605         ;;
606 .Lcj1:  st8             [rp] = w0, 8            C                       M23
607         mov.i           ar.lc = r2              C                       I0
608    (p6) mov             r8 = 1                  C                       M I
609         br.ret.sptk.many b0                     C                       B
610 EPILOGUE()
611 ASM_END()