6e1e21455894351ae2722ec9b59da68dc7f08f4b
[platform/upstream/gmp.git] / mpn / alpha / rshift.asm
1 dnl  Alpha mpn_rshift -- Shift a number right.
2
3 dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
4
5 dnl  This file is part of the GNU MP Library.
6 dnl
7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl  it under the terms of either:
9 dnl
10 dnl    * the GNU Lesser General Public License as published by the Free
11 dnl      Software Foundation; either version 3 of the License, or (at your
12 dnl      option) any later version.
13 dnl
14 dnl  or
15 dnl
16 dnl    * the GNU General Public License as published by the Free Software
17 dnl      Foundation; either version 2 of the License, or (at your option) any
18 dnl      later version.
19 dnl
20 dnl  or both in parallel, as here.
21 dnl
22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 dnl  for more details.
26 dnl
27 dnl  You should have received copies of the GNU General Public License and the
28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29 dnl  see https://www.gnu.org/licenses/.
30
31 include(`../config.m4')
32
33 C      cycles/limb
34 C EV4:     ?
35 C EV5:     3.25
36 C EV6:     1.75
37
38 C  INPUT PARAMETERS
39 C  rp   r16
40 C  up   r17
41 C  n    r18
42 C  cnt  r19
43
44
45 ASM_START()
46 PROLOGUE(mpn_rshift)
47         ldq     r4,0(r17)       C load first limb
48         subq    r31,r19,r20
49         subq    r18,1,r18
50         and     r18,4-1,r28     C number of limbs in first loop
51         sll     r4,r20,r0       C compute function result
52
53         beq     r28,L(L0)
54         subq    r18,r28,r18
55
56         ALIGN(8)
57 L(top0):
58         ldq     r3,8(r17)
59         addq    r16,8,r16
60         srl     r4,r19,r5
61         addq    r17,8,r17
62         subq    r28,1,r28
63         sll     r3,r20,r6
64         bis     r3,r3,r4
65         bis     r5,r6,r8
66         stq     r8,-8(r16)
67         bne     r28,L(top0)
68
69 L(L0):  srl     r4,r19,r24
70         beq     r18,L(end)
71 C warm up phase 1
72         ldq     r1,8(r17)
73         subq    r18,4,r18
74         ldq     r2,16(r17)
75         ldq     r3,24(r17)
76         ldq     r4,32(r17)
77 C warm up phase 2
78         sll     r1,r20,r7
79         srl     r1,r19,r21
80         sll     r2,r20,r8
81         beq     r18,L(end1)
82         ldq     r1,40(r17)
83         srl     r2,r19,r22
84         ldq     r2,48(r17)
85         sll     r3,r20,r5
86         bis     r7,r24,r7
87         srl     r3,r19,r23
88         bis     r8,r21,r8
89         sll     r4,r20,r6
90         ldq     r3,56(r17)
91         srl     r4,r19,r24
92         ldq     r4,64(r17)
93         subq    r18,4,r18
94         beq     r18,L(end2)
95         ALIGN(16)
96 C main loop
97 L(top): stq     r7,0(r16)
98         bis     r5,r22,r5
99         stq     r8,8(r16)
100         bis     r6,r23,r6
101
102         sll     r1,r20,r7
103         subq    r18,4,r18
104         srl     r1,r19,r21
105         unop    C ldq   r31,-96(r17)
106
107         sll     r2,r20,r8
108         ldq     r1,72(r17)
109         srl     r2,r19,r22
110         ldq     r2,80(r17)
111
112         stq     r5,16(r16)
113         bis     r7,r24,r7
114         stq     r6,24(r16)
115         bis     r8,r21,r8
116
117         sll     r3,r20,r5
118         unop    C ldq   r31,-96(r17)
119         srl     r3,r19,r23
120         addq    r16,32,r16
121
122         sll     r4,r20,r6
123         ldq     r3,88(r17)
124         srl     r4,r19,r24
125         ldq     r4,96(r17)
126
127         addq    r17,32,r17
128         bne     r18,L(top)
129 C cool down phase 2/1
130 L(end2):
131         stq     r7,0(r16)
132         bis     r5,r22,r5
133         stq     r8,8(r16)
134         bis     r6,r23,r6
135         sll     r1,r20,r7
136         srl     r1,r19,r21
137         sll     r2,r20,r8
138         srl     r2,r19,r22
139         stq     r5,16(r16)
140         bis     r7,r24,r7
141         stq     r6,24(r16)
142         bis     r8,r21,r8
143         sll     r3,r20,r5
144         srl     r3,r19,r23
145         sll     r4,r20,r6
146         srl     r4,r19,r24
147 C cool down phase 2/2
148         stq     r7,32(r16)
149         bis     r5,r22,r5
150         stq     r8,40(r16)
151         bis     r6,r23,r6
152         stq     r5,48(r16)
153         stq     r6,56(r16)
154 C cool down phase 2/3
155         stq     r24,64(r16)
156         ret     r31,(r26),1
157
158 C cool down phase 1/1
159 L(end1):
160         srl     r2,r19,r22
161         sll     r3,r20,r5
162         bis     r7,r24,r7
163         srl     r3,r19,r23
164         bis     r8,r21,r8
165         sll     r4,r20,r6
166         srl     r4,r19,r24
167 C cool down phase 1/2
168         stq     r7,0(r16)
169         bis     r5,r22,r5
170         stq     r8,8(r16)
171         bis     r6,r23,r6
172         stq     r5,16(r16)
173         stq     r6,24(r16)
174         stq     r24,32(r16)
175         ret     r31,(r26),1
176
177 L(end): stq     r24,0(r16)
178         ret     r31,(r26),1
179 EPILOGUE(mpn_rshift)
180 ASM_END()