Tizen 2.1 base
[external/gmp.git] / mpn / ia64 / logops_n.asm
1 dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2 dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4 dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
5 dnl
6 dnl  This file is part of the GNU MP Library.
7 dnl
8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl  it under the terms of the GNU Lesser General Public License as published
10 dnl  by the Free Software Foundation; either version 3 of the License, or (at
11 dnl  your option) any later version.
12 dnl
13 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 dnl  License for more details.
17 dnl
18 dnl  You should have received a copy of the GNU Lesser General Public License
19 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21 include(`../config.m4')
22
23 C           cycles/limb
24 C Itanium:      2
25 C Itanium 2:    1
26
27 C TODO
28 C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
29 C    wind-down code).
30
31 C INPUT PARAMETERS
32 define(`rp', `r32')
33 define(`up', `r33')
34 define(`vp', `r34')
35 define(`n', `r35')
36
37 ifdef(`OPERATION_and_n',
38 `       define(`func',`mpn_and_n')
39         define(`logop',         `and    $1 = $2, $3')
40         define(`notormov',      `mov    $1 = $2')')
41 ifdef(`OPERATION_andn_n',
42 `       define(`func',`mpn_andn_n')
43         define(`logop',         `andcm  $1 = $2, $3')
44         define(`notormov',      `mov    $1 = $2')')
45 ifdef(`OPERATION_nand_n',
46 `       define(`func',`mpn_nand_n')
47         define(`logop',         `and    $1 = $2, $3')
48         define(`notormov',      `sub    $1 = -1, $2')')
49 ifdef(`OPERATION_ior_n',
50 `       define(`func',`mpn_ior_n')
51         define(`logop',         `or     $1 = $2, $3')
52         define(`notormov',      `mov    $1 = $2')')
53 ifdef(`OPERATION_iorn_n',
54 `       define(`func',`mpn_iorn_n')
55         define(`logop',         `andcm  $1 = $3, $2')
56         define(`notormov',      `sub    $1 = -1, $2')')
57 ifdef(`OPERATION_nior_n',
58 `       define(`func',`mpn_nior_n')
59         define(`logop',         `or     $1 = $2, $3')
60         define(`notormov',      `sub    $1 = -1, $2')')
61 ifdef(`OPERATION_xor_n',
62 `       define(`func',`mpn_xor_n')
63         define(`logop',         `xor    $1 = $2, $3')
64         define(`notormov',      `mov    $1 = $2')')
65 ifdef(`OPERATION_xnor_n',
66 `       define(`func',`mpn_xnor_n')
67         define(`logop',         `xor    $1 = $2, $3')
68         define(`notormov',      `sub    $1 = -1, $2')')
69
70 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
71
72 ASM_START()
73 PROLOGUE(func)
74         .prologue
75         .save   ar.lc, r2
76         .body
77 ifdef(`HAVE_ABI_32',
78 `       addp4   rp = 0, rp                      C                       M I
79         addp4   up = 0, up                      C                       M I
80         addp4   vp = 0, vp                      C                       M I
81         zxt4    n = n                           C                       I
82         ;;
83 ')
84 {.mmi
85         ld8             r10 = [up], 8           C                       M
86         ld8             r11 = [vp], 8           C                       M
87         mov.i           r2 = ar.lc              C                       I0
88 }
89 {.mmi
90         and             r14 = 3, n              C                       M I
91         cmp.lt          p15, p14 = 4, n         C                       M I
92         shr.u           n = n, 2                C                       I0
93         ;;
94 }
95 {.mmi
96         cmp.eq          p6, p0 = 1, r14         C                       M I
97         cmp.eq          p7, p0 = 2, r14         C                       M I
98         cmp.eq          p8, p0 = 3, r14         C                       M I
99 }
100 {.bbb
101    (p6) br.dptk         .Lb01                   C                       B
102    (p7) br.dptk         .Lb10                   C                       B
103    (p8) br.dptk         .Lb11                   C                       B
104 }
105
106 .Lb00:  ld8             r17 = [up], 8           C                       M
107         ld8             r21 = [vp], 8           C                       M
108         add             n = -2, n               C                       M I
109         ;;
110         ld8             r18 = [up], 8           C                       M
111         ld8             r22 = [vp], 8           C                       M
112         ;;
113         ld8             r19 = [up], 8           C                       M
114         ld8             r23 = [vp], 8           C                       M
115   (p15) br.cond.dpnt    .grt4                   C                       B
116
117         logop(          r14, r10, r11)          C                       M I
118         ;;
119         logop(          r15, r17, r21)          C                       M I
120         notormov(       r8, r14)                C                       M I
121         br              .Lcj4                   C                       B
122
123 .grt4:  logop(          r14, r10, r11)          C                       M I
124         ld8             r16 = [up], 8           C                       M
125         ld8             r20 = [vp], 8           C                       M
126         ;;
127         logop(          r15, r17, r21)          C                       M I
128         ld8             r17 = [up], 8           C                       M
129         mov.i           ar.lc = n               C                       I0
130         notormov(       r8, r14)                C                       M I
131         ld8             r21 = [vp], 8           C                       M
132         br              .LL00                   C                       B
133
134 .Lb01:  add             n = -1, n               C                       M I
135         logop(          r15, r10, r11)          C                       M I
136   (p15) br.cond.dpnt    .grt1                   C                       B
137         ;;
138
139         notormov(       r9, r15)                C                       M I
140         br              .Lcj1                   C                       B
141
142 .grt1:  ld8             r16 = [up], 8           C                       M
143         ld8             r20 = [vp], 8           C                       M
144         ;;
145         ld8             r17 = [up], 8           C                       M
146         ld8             r21 = [vp], 8           C                       M
147         mov.i           ar.lc = n               C                       I0
148         ;;
149         ld8             r18 = [up], 8           C                       M
150         ld8             r22 = [vp], 8           C                       M
151         ;;
152         ld8             r19 = [up], 8           C                       M
153         ld8             r23 = [vp], 8           C                       M
154         br.cloop.dptk   .grt5                   C                       B
155         ;;
156
157         logop(          r14, r16, r20)          C                       M I
158         notormov(       r9, r15)                C                       M I
159         br              .Lcj5                   C                       B
160
161 .grt5:  logop(          r14, r16, r20)          C                       M I
162         ld8             r16 = [up], 8           C                       M
163         notormov(       r9, r15)                C                       M I
164         ld8             r20 = [vp], 8           C                       M
165         br              .LL01                   C                       B
166
167 .Lb10:  ld8             r19 = [up], 8           C                       M
168         ld8             r23 = [vp], 8           C                       M
169   (p15) br.cond.dpnt    .grt2                   C                       B
170
171         logop(          r14, r10, r11)          C                       M I
172         ;;
173         logop(          r15, r19, r23)          C                       M I
174         notormov(       r8, r14)                C                       M I
175         br              .Lcj2                   C                       B
176
177 .grt2:  ld8             r16 = [up], 8           C                       M
178         ld8             r20 = [vp], 8           C                       M
179         add             n = -1, n               C                       M I
180         ;;
181         ld8             r17 = [up], 8           C                       M
182         ld8             r21 = [vp], 8           C                       M
183         logop(          r14, r10, r11)          C                       M I
184         ;;
185         ld8             r18 = [up], 8           C                       M
186         ld8             r22 = [vp], 8           C                       M
187         mov.i           ar.lc = n               C                       I0
188         ;;
189         logop(          r15, r19, r23)          C                       M I
190         ld8             r19 = [up], 8           C                       M
191         notormov(       r8, r14)                C                       M I
192         ld8             r23 = [vp], 8           C                       M
193         br.cloop.dptk   .Loop                   C                       B
194         br              .Lcj6                   C                       B
195
196 .Lb11:  ld8             r18 = [up], 8           C                       M
197         ld8             r22 = [vp], 8           C                       M
198         add             n = -1, n               C                       M I
199         ;;
200         ld8             r19 = [up], 8           C                       M
201         ld8             r23 = [vp], 8           C                       M
202         logop(          r15, r10, r11)          C                       M I
203   (p15) br.cond.dpnt    .grt3                   C                       B
204         ;;
205
206         logop(          r14, r18, r22)          C                       M I
207         notormov(       r9, r15)                C                       M I
208         br              .Lcj3                   C                       B
209
210 .grt3:  ld8             r16 = [up], 8           C                       M
211         ld8             r20 = [vp], 8           C                       M
212         ;;
213         ld8             r17 = [up], 8           C                       M
214         ld8             r21 = [vp], 8           C                       M
215         mov.i           ar.lc = n               C                       I0
216         ;;
217         logop(          r14, r18, r22)          C                       M I
218         ld8             r18 = [up], 8           C                       M
219         notormov(       r9, r15)                C                       M I
220         ld8             r22 = [vp], 8           C                       M
221         br              .LL11                   C                       B
222
223 C *** MAIN LOOP START ***
224         ALIGN(32)
225 .Loop:  st8             [rp] = r8, 8            C                       M
226         logop(          r14, r16, r20)          C                       M I
227         notormov(       r9, r15)                C                       M I
228         ld8             r16 = [up], 8           C                       M
229         ld8             r20 = [vp], 8           C                       M
230         nop.b           0
231         ;;
232 .LL01:  st8             [rp] = r9, 8            C                       M
233         logop(          r15, r17, r21)          C                       M I
234         notormov(       r8, r14)                C                       M I
235         ld8             r17 = [up], 8           C                       M
236         ld8             r21 = [vp], 8           C                       M
237         nop.b           0
238         ;;
239 .LL00:  st8             [rp] = r8, 8            C                       M
240         logop(          r14, r18, r22)          C                       M I
241         notormov(       r9, r15)                C                       M I
242         ld8             r18 = [up], 8           C                       M
243         ld8             r22 = [vp], 8           C                       M
244         nop.b           0
245         ;;
246 .LL11:  st8             [rp] = r9, 8            C                       M
247         logop(          r15, r19, r23)          C                       M I
248         notormov(       r8, r14)                C                       M I
249         ld8             r19 = [up], 8           C                       M
250         ld8             r23 = [vp], 8           C                       M
251         br.cloop.dptk   .Loop   ;;              C                       B
252 C *** MAIN LOOP END ***
253
254 .Lcj6:  st8             [rp] = r8, 8            C                       M
255         logop(          r14, r16, r20)          C                       M I
256         notormov(       r9, r15)                C                       M I
257         ;;
258 .Lcj5:  st8             [rp] = r9, 8            C                       M
259         logop(          r15, r17, r21)          C                       M I
260         notormov(       r8, r14)                C                       M I
261         ;;
262 .Lcj4:  st8             [rp] = r8, 8            C                       M
263         logop(          r14, r18, r22)          C                       M I
264         notormov(       r9, r15)                C                       M I
265         ;;
266 .Lcj3:  st8             [rp] = r9, 8            C                       M
267         logop(          r15, r19, r23)          C                       M I
268         notormov(       r8, r14)                C                       M I
269         ;;
270 .Lcj2:  st8             [rp] = r8, 8            C                       M
271         notormov(       r9, r15)                C                       M I
272         ;;
273 .Lcj1:  st8             [rp] = r9, 8            C                       M
274         mov.i           ar.lc = r2              C                       I0
275         br.ret.sptk.many b0                     C                       B
276 EPILOGUE()
277 ASM_END()