Upload Tizen:Base source
[external/gmp.git] / mpn / alpha / lshift.asm
1 dnl  Alpha mpn_lshift -- Shift a number left.
2
3 dnl  Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
4
5 dnl  This file is part of the GNU MP Library.
6
7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl  it under the terms of the GNU Lesser General Public License as published
9 dnl  by the Free Software Foundation; either version 3 of the License, or (at
10 dnl  your option) any later version.
11
12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 dnl  License for more details.
16
17 dnl  You should have received a copy of the GNU Lesser General Public License
18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C      cycles/limb
23 C EV4:     ?
24 C EV5:     3.25
25 C EV6:     1.75
26
27 C  INPUT PARAMETERS
28 C  rp   r16
29 C  up   r17
30 C  n    r18
31 C  cnt  r19
32
33
34 ASM_START()
35 PROLOGUE(mpn_lshift)
36         s8addq  r18,r17,r17     C make r17 point at end of s1
37         ldq     r4,-8(r17)      C load first limb
38         subq    r31,r19,r20
39         s8addq  r18,r16,r16     C make r16 point at end of RES
40         subq    r18,1,r18
41         and     r18,4-1,r28     C number of limbs in first loop
42         srl     r4,r20,r0       C compute function result
43
44         beq     r28,L(L0)
45         subq    r18,r28,r18
46
47         ALIGN(8)
48 L(top0):
49         ldq     r3,-16(r17)
50         subq    r16,8,r16
51         sll     r4,r19,r5
52         subq    r17,8,r17
53         subq    r28,1,r28
54         srl     r3,r20,r6
55         bis     r3,r3,r4
56         bis     r5,r6,r8
57         stq     r8,0(r16)
58         bne     r28,L(top0)
59
60 L(L0):  sll     r4,r19,r24
61         beq     r18,L(end)
62 C warm up phase 1
63         ldq     r1,-16(r17)
64         subq    r18,4,r18
65         ldq     r2,-24(r17)
66         ldq     r3,-32(r17)
67         ldq     r4,-40(r17)
68 C warm up phase 2
69         srl     r1,r20,r7
70         sll     r1,r19,r21
71         srl     r2,r20,r8
72         beq     r18,L(end1)
73         ldq     r1,-48(r17)
74         sll     r2,r19,r22
75         ldq     r2,-56(r17)
76         srl     r3,r20,r5
77         bis     r7,r24,r7
78         sll     r3,r19,r23
79         bis     r8,r21,r8
80         srl     r4,r20,r6
81         ldq     r3,-64(r17)
82         sll     r4,r19,r24
83         ldq     r4,-72(r17)
84         subq    r18,4,r18
85         beq     r18,L(end2)
86         ALIGN(16)
87 C main loop
88 L(top): stq     r7,-8(r16)
89         bis     r5,r22,r5
90         stq     r8,-16(r16)
91         bis     r6,r23,r6
92
93         srl     r1,r20,r7
94         subq    r18,4,r18
95         sll     r1,r19,r21
96         unop    C ldq   r31,-96(r17)
97
98         srl     r2,r20,r8
99         ldq     r1,-80(r17)
100         sll     r2,r19,r22
101         ldq     r2,-88(r17)
102
103         stq     r5,-24(r16)
104         bis     r7,r24,r7
105         stq     r6,-32(r16)
106         bis     r8,r21,r8
107
108         srl     r3,r20,r5
109         unop    C ldq   r31,-96(r17)
110         sll     r3,r19,r23
111         subq    r16,32,r16
112
113         srl     r4,r20,r6
114         ldq     r3,-96(r17)
115         sll     r4,r19,r24
116         ldq     r4,-104(r17)
117
118         subq    r17,32,r17
119         bne     r18,L(top)
120 C cool down phase 2/1
121 L(end2):
122         stq     r7,-8(r16)
123         bis     r5,r22,r5
124         stq     r8,-16(r16)
125         bis     r6,r23,r6
126         srl     r1,r20,r7
127         sll     r1,r19,r21
128         srl     r2,r20,r8
129         sll     r2,r19,r22
130         stq     r5,-24(r16)
131         bis     r7,r24,r7
132         stq     r6,-32(r16)
133         bis     r8,r21,r8
134         srl     r3,r20,r5
135         sll     r3,r19,r23
136         srl     r4,r20,r6
137         sll     r4,r19,r24
138 C cool down phase 2/2
139         stq     r7,-40(r16)
140         bis     r5,r22,r5
141         stq     r8,-48(r16)
142         bis     r6,r23,r6
143         stq     r5,-56(r16)
144         stq     r6,-64(r16)
145 C cool down phase 2/3
146         stq     r24,-72(r16)
147         ret     r31,(r26),1
148
149 C cool down phase 1/1
150 L(end1):
151         sll     r2,r19,r22
152         srl     r3,r20,r5
153         bis     r7,r24,r7
154         sll     r3,r19,r23
155         bis     r8,r21,r8
156         srl     r4,r20,r6
157         sll     r4,r19,r24
158 C cool down phase 1/2
159         stq     r7,-8(r16)
160         bis     r5,r22,r5
161         stq     r8,-16(r16)
162         bis     r6,r23,r6
163         stq     r5,-24(r16)
164         stq     r6,-32(r16)
165         stq     r24,-40(r16)
166         ret     r31,(r26),1
167
168 L(end): stq     r24,-8(r16)
169         ret     r31,(r26),1
170 EPILOGUE(mpn_lshift)
171 ASM_END()