Upload Tizen:Base source
[external/gmp.git] / mpn / alpha / rshift.asm
1 dnl  Alpha mpn_rshift -- Shift a number right.
2
3 dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
4
5 dnl  This file is part of the GNU MP Library.
6
7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl  it under the terms of the GNU Lesser General Public License as published
9 dnl  by the Free Software Foundation; either version 3 of the License, or (at
10 dnl  your option) any later version.
11
12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 dnl  License for more details.
16
17 dnl  You should have received a copy of the GNU Lesser General Public License
18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C      cycles/limb
23 C EV4:     ?
24 C EV5:     3.25
25 C EV6:     1.75
26
27 C  INPUT PARAMETERS
28 C  rp   r16
29 C  up   r17
30 C  n    r18
31 C  cnt  r19
32
33
34 ASM_START()
35 PROLOGUE(mpn_rshift)
36         ldq     r4,0(r17)       C load first limb
37         subq    r31,r19,r20
38         subq    r18,1,r18
39         and     r18,4-1,r28     C number of limbs in first loop
40         sll     r4,r20,r0       C compute function result
41
42         beq     r28,L(L0)
43         subq    r18,r28,r18
44
45         ALIGN(8)
46 L(top0):
47         ldq     r3,8(r17)
48         addq    r16,8,r16
49         srl     r4,r19,r5
50         addq    r17,8,r17
51         subq    r28,1,r28
52         sll     r3,r20,r6
53         bis     r3,r3,r4
54         bis     r5,r6,r8
55         stq     r8,-8(r16)
56         bne     r28,L(top0)
57
58 L(L0):  srl     r4,r19,r24
59         beq     r18,L(end)
60 C warm up phase 1
61         ldq     r1,8(r17)
62         subq    r18,4,r18
63         ldq     r2,16(r17)
64         ldq     r3,24(r17)
65         ldq     r4,32(r17)
66 C warm up phase 2
67         sll     r1,r20,r7
68         srl     r1,r19,r21
69         sll     r2,r20,r8
70         beq     r18,L(end1)
71         ldq     r1,40(r17)
72         srl     r2,r19,r22
73         ldq     r2,48(r17)
74         sll     r3,r20,r5
75         bis     r7,r24,r7
76         srl     r3,r19,r23
77         bis     r8,r21,r8
78         sll     r4,r20,r6
79         ldq     r3,56(r17)
80         srl     r4,r19,r24
81         ldq     r4,64(r17)
82         subq    r18,4,r18
83         beq     r18,L(end2)
84         ALIGN(16)
85 C main loop
86 L(top): stq     r7,0(r16)
87         bis     r5,r22,r5
88         stq     r8,8(r16)
89         bis     r6,r23,r6
90
91         sll     r1,r20,r7
92         subq    r18,4,r18
93         srl     r1,r19,r21
94         unop    C ldq   r31,-96(r17)
95
96         sll     r2,r20,r8
97         ldq     r1,72(r17)
98         srl     r2,r19,r22
99         ldq     r2,80(r17)
100
101         stq     r5,16(r16)
102         bis     r7,r24,r7
103         stq     r6,24(r16)
104         bis     r8,r21,r8
105
106         sll     r3,r20,r5
107         unop    C ldq   r31,-96(r17)
108         srl     r3,r19,r23
109         addq    r16,32,r16
110
111         sll     r4,r20,r6
112         ldq     r3,88(r17)
113         srl     r4,r19,r24
114         ldq     r4,96(r17)
115
116         addq    r17,32,r17
117         bne     r18,L(top)
118 C cool down phase 2/1
119 L(end2):
120         stq     r7,0(r16)
121         bis     r5,r22,r5
122         stq     r8,8(r16)
123         bis     r6,r23,r6
124         sll     r1,r20,r7
125         srl     r1,r19,r21
126         sll     r2,r20,r8
127         srl     r2,r19,r22
128         stq     r5,16(r16)
129         bis     r7,r24,r7
130         stq     r6,24(r16)
131         bis     r8,r21,r8
132         sll     r3,r20,r5
133         srl     r3,r19,r23
134         sll     r4,r20,r6
135         srl     r4,r19,r24
136 C cool down phase 2/2
137         stq     r7,32(r16)
138         bis     r5,r22,r5
139         stq     r8,40(r16)
140         bis     r6,r23,r6
141         stq     r5,48(r16)
142         stq     r6,56(r16)
143 C cool down phase 2/3
144         stq     r24,64(r16)
145         ret     r31,(r26),1
146
147 C cool down phase 1/1
148 L(end1):
149         srl     r2,r19,r22
150         sll     r3,r20,r5
151         bis     r7,r24,r7
152         srl     r3,r19,r23
153         bis     r8,r21,r8
154         sll     r4,r20,r6
155         srl     r4,r19,r24
156 C cool down phase 1/2
157         stq     r7,0(r16)
158         bis     r5,r22,r5
159         stq     r8,8(r16)
160         bis     r6,r23,r6
161         stq     r5,16(r16)
162         stq     r6,24(r16)
163         stq     r24,32(r16)
164         ret     r31,(r26),1
165
166 L(end): stq     r24,0(r16)
167         ret     r31,(r26),1
168 EPILOGUE(mpn_rshift)
169 ASM_END()