Upload Tizen:Base source
[external/gmp.git] / mpn / alpha / sub_n.asm
1 dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
2 dnl  and store difference in a third limb vector.
3
4 dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
5
6 dnl  This file is part of the GNU MP Library.
7
8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl  it under the terms of the GNU Lesser General Public License as published
10 dnl  by the Free Software Foundation; either version 3 of the License, or (at
11 dnl  your option) any later version.
12
13 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 dnl  License for more details.
17
18 dnl  You should have received a copy of the GNU Lesser General Public License
19 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21 include(`../config.m4')
22
23 C      cycles/limb
24 C EV4:     ?
25 C EV5:     4.75
26 C EV6:     3
27
28 dnl  INPUT PARAMETERS
29 dnl  res_ptr    r16
30 dnl  s1_ptr     r17
31 dnl  s2_ptr     r18
32 dnl  size       r19
33
34 ASM_START()
35 PROLOGUE(mpn_sub_n)
36         bis     r31,r31,r25             C clear cy
37         subq    r19,4,r19               C decr loop cnt
38         blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
39 C Start software pipeline for 1st loop
40         ldq     r0,0(r18)
41         ldq     r4,0(r17)
42         ldq     r1,8(r18)
43         ldq     r5,8(r17)
44         addq    r17,32,r17              C update s1_ptr
45         ldq     r2,16(r18)
46         subq    r4,r0,r20               C 1st main subtract
47         ldq     r3,24(r18)
48         subq    r19,4,r19               C decr loop cnt
49         ldq     r6,-16(r17)
50         cmpult  r4,r0,r25               C compute cy from last subtract
51         ldq     r7,-8(r17)
52         subq    r5,r1,r28               C 2nd main subtract
53         addq    r18,32,r18              C update s2_ptr
54         subq    r28,r25,r21             C 2nd carry subtract
55         cmpult  r5,r1,r8                C compute cy from last subtract
56         blt     r19,$Lend1              C if less than 4 limbs remain, jump
57 C 1st loop handles groups of 4 limbs in a software pipeline
58         ALIGN(16)
59 $Loop:  cmpult  r28,r25,r25             C compute cy from last subtract
60         ldq     r0,0(r18)
61         bis     r8,r25,r25              C combine cy from the two subtracts
62         ldq     r1,8(r18)
63         subq    r6,r2,r28               C 3rd main subtract
64         ldq     r4,0(r17)
65         subq    r28,r25,r22             C 3rd carry subtract
66         ldq     r5,8(r17)
67         cmpult  r6,r2,r8                C compute cy from last subtract
68         cmpult  r28,r25,r25             C compute cy from last subtract
69         stq     r20,0(r16)
70         bis     r8,r25,r25              C combine cy from the two subtracts
71         stq     r21,8(r16)
72         subq    r7,r3,r28               C 4th main subtract
73         subq    r28,r25,r23             C 4th carry subtract
74         cmpult  r7,r3,r8                C compute cy from last subtract
75         cmpult  r28,r25,r25             C compute cy from last subtract
76                 addq    r17,32,r17              C update s1_ptr
77         bis     r8,r25,r25              C combine cy from the two subtracts
78                 addq    r16,32,r16              C update res_ptr
79         subq    r4,r0,r28               C 1st main subtract
80         ldq     r2,16(r18)
81         subq    r28,r25,r20             C 1st carry subtract
82         ldq     r3,24(r18)
83         cmpult  r4,r0,r8                C compute cy from last subtract
84         ldq     r6,-16(r17)
85         cmpult  r28,r25,r25             C compute cy from last subtract
86         ldq     r7,-8(r17)
87         bis     r8,r25,r25              C combine cy from the two subtracts
88         subq    r19,4,r19               C decr loop cnt
89         stq     r22,-16(r16)
90         subq    r5,r1,r28               C 2nd main subtract
91         stq     r23,-8(r16)
92         subq    r28,r25,r21             C 2nd carry subtract
93                 addq    r18,32,r18              C update s2_ptr
94         cmpult  r5,r1,r8                C compute cy from last subtract
95         bge     r19,$Loop
96 C Finish software pipeline for 1st loop
97 $Lend1: cmpult  r28,r25,r25             C compute cy from last subtract
98         bis     r8,r25,r25              C combine cy from the two subtracts
99         subq    r6,r2,r28               C cy add
100         subq    r28,r25,r22             C 3rd main subtract
101         cmpult  r6,r2,r8                C compute cy from last subtract
102         cmpult  r28,r25,r25             C compute cy from last subtract
103         stq     r20,0(r16)
104         bis     r8,r25,r25              C combine cy from the two subtracts
105         stq     r21,8(r16)
106         subq    r7,r3,r28               C cy add
107         subq    r28,r25,r23             C 4th main subtract
108         cmpult  r7,r3,r8                C compute cy from last subtract
109         cmpult  r28,r25,r25             C compute cy from last subtract
110         bis     r8,r25,r25              C combine cy from the two subtracts
111         addq    r16,32,r16              C update res_ptr
112         stq     r22,-16(r16)
113         stq     r23,-8(r16)
114 $Lend2: addq    r19,4,r19               C restore loop cnt
115         beq     r19,$Lret
116 C Start software pipeline for 2nd loop
117         ldq     r0,0(r18)
118         ldq     r4,0(r17)
119         subq    r19,1,r19
120         beq     r19,$Lend0
121 C 2nd loop handles remaining 1-3 limbs
122         ALIGN(16)
123 $Loop0: subq    r4,r0,r28               C main subtract
124         cmpult  r4,r0,r8                C compute cy from last subtract
125         ldq     r0,8(r18)
126         ldq     r4,8(r17)
127         subq    r28,r25,r20             C carry subtract
128         addq    r18,8,r18
129         addq    r17,8,r17
130         stq     r20,0(r16)
131         cmpult  r28,r25,r25             C compute cy from last subtract
132         subq    r19,1,r19               C decr loop cnt
133         bis     r8,r25,r25              C combine cy from the two subtracts
134         addq    r16,8,r16
135         bne     r19,$Loop0
136 $Lend0: subq    r4,r0,r28               C main subtract
137         subq    r28,r25,r20             C carry subtract
138         cmpult  r4,r0,r8                C compute cy from last subtract
139         cmpult  r28,r25,r25             C compute cy from last subtract
140         stq     r20,0(r16)
141         bis     r8,r25,r25              C combine cy from the two subtracts
142
143 $Lret:  bis     r25,r31,r0              C return cy
144         ret     r31,(r26),1
145 EPILOGUE(mpn_sub_n)
146 ASM_END()