1 dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2 dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
4 dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
28 C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
37 ifdef(`OPERATION_and_n',
38 ` define(`func',`mpn_and_n')
39 define(`logop', `and $1 = $2, $3')
40 define(`notormov', `mov $1 = $2')')
41 ifdef(`OPERATION_andn_n',
42 ` define(`func',`mpn_andn_n')
43 define(`logop', `andcm $1 = $2, $3')
44 define(`notormov', `mov $1 = $2')')
45 ifdef(`OPERATION_nand_n',
46 ` define(`func',`mpn_nand_n')
47 define(`logop', `and $1 = $2, $3')
48 define(`notormov', `sub $1 = -1, $2')')
49 ifdef(`OPERATION_ior_n',
50 ` define(`func',`mpn_ior_n')
51 define(`logop', `or $1 = $2, $3')
52 define(`notormov', `mov $1 = $2')')
53 ifdef(`OPERATION_iorn_n',
54 ` define(`func',`mpn_iorn_n')
55 define(`logop', `andcm $1 = $3, $2')
56 define(`notormov', `sub $1 = -1, $2')')
57 ifdef(`OPERATION_nior_n',
58 ` define(`func',`mpn_nior_n')
59 define(`logop', `or $1 = $2, $3')
60 define(`notormov', `sub $1 = -1, $2')')
61 ifdef(`OPERATION_xor_n',
62 ` define(`func',`mpn_xor_n')
63 define(`logop', `xor $1 = $2, $3')
64 define(`notormov', `mov $1 = $2')')
65 ifdef(`OPERATION_xnor_n',
66 ` define(`func',`mpn_xnor_n')
67 define(`logop', `xor $1 = $2, $3')
68 define(`notormov', `sub $1 = -1, $2')')
70 MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
78 ` addp4 rp = 0, rp C M I
79 addp4 up = 0, up C M I
80 addp4 vp = 0, vp C M I
91 cmp.lt p15, p14 = 4, n C M I
96 cmp.eq p6, p0 = 1, r14 C M I
97 cmp.eq p7, p0 = 2, r14 C M I
98 cmp.eq p8, p0 = 3, r14 C M I
101 (p6) br.dptk .Lb01 C B
102 (p7) br.dptk .Lb10 C B
103 (p8) br.dptk .Lb11 C B
106 .Lb00: ld8 r17 = [up], 8 C M
107 ld8 r21 = [vp], 8 C M
110 ld8 r18 = [up], 8 C M
111 ld8 r22 = [vp], 8 C M
113 ld8 r19 = [up], 8 C M
114 ld8 r23 = [vp], 8 C M
115 (p15) br.cond.dpnt .grt4 C B
117 logop( r14, r10, r11) C M I
119 logop( r15, r17, r21) C M I
120 notormov( r8, r14) C M I
123 .grt4: logop( r14, r10, r11) C M I
124 ld8 r16 = [up], 8 C M
125 ld8 r20 = [vp], 8 C M
127 logop( r15, r17, r21) C M I
128 ld8 r17 = [up], 8 C M
130 notormov( r8, r14) C M I
131 ld8 r21 = [vp], 8 C M
134 .Lb01: add n = -1, n C M I
135 logop( r15, r10, r11) C M I
136 (p15) br.cond.dpnt .grt1 C B
139 notormov( r9, r15) C M I
142 .grt1: ld8 r16 = [up], 8 C M
143 ld8 r20 = [vp], 8 C M
145 ld8 r17 = [up], 8 C M
146 ld8 r21 = [vp], 8 C M
149 ld8 r18 = [up], 8 C M
150 ld8 r22 = [vp], 8 C M
152 ld8 r19 = [up], 8 C M
153 ld8 r23 = [vp], 8 C M
154 br.cloop.dptk .grt5 C B
157 logop( r14, r16, r20) C M I
158 notormov( r9, r15) C M I
161 .grt5: logop( r14, r16, r20) C M I
162 ld8 r16 = [up], 8 C M
163 notormov( r9, r15) C M I
164 ld8 r20 = [vp], 8 C M
167 .Lb10: ld8 r19 = [up], 8 C M
168 ld8 r23 = [vp], 8 C M
169 (p15) br.cond.dpnt .grt2 C B
171 logop( r14, r10, r11) C M I
173 logop( r15, r19, r23) C M I
174 notormov( r8, r14) C M I
177 .grt2: ld8 r16 = [up], 8 C M
178 ld8 r20 = [vp], 8 C M
181 ld8 r17 = [up], 8 C M
182 ld8 r21 = [vp], 8 C M
183 logop( r14, r10, r11) C M I
185 ld8 r18 = [up], 8 C M
186 ld8 r22 = [vp], 8 C M
189 logop( r15, r19, r23) C M I
190 ld8 r19 = [up], 8 C M
191 notormov( r8, r14) C M I
192 ld8 r23 = [vp], 8 C M
193 br.cloop.dptk .Loop C B
196 .Lb11: ld8 r18 = [up], 8 C M
197 ld8 r22 = [vp], 8 C M
200 ld8 r19 = [up], 8 C M
201 ld8 r23 = [vp], 8 C M
202 logop( r15, r10, r11) C M I
203 (p15) br.cond.dpnt .grt3 C B
206 logop( r14, r18, r22) C M I
207 notormov( r9, r15) C M I
210 .grt3: ld8 r16 = [up], 8 C M
211 ld8 r20 = [vp], 8 C M
213 ld8 r17 = [up], 8 C M
214 ld8 r21 = [vp], 8 C M
217 logop( r14, r18, r22) C M I
218 ld8 r18 = [up], 8 C M
219 notormov( r9, r15) C M I
220 ld8 r22 = [vp], 8 C M
223 C *** MAIN LOOP START ***
225 .Loop: st8 [rp] = r8, 8 C M
226 logop( r14, r16, r20) C M I
227 notormov( r9, r15) C M I
228 ld8 r16 = [up], 8 C M
229 ld8 r20 = [vp], 8 C M
232 .LL01: st8 [rp] = r9, 8 C M
233 logop( r15, r17, r21) C M I
234 notormov( r8, r14) C M I
235 ld8 r17 = [up], 8 C M
236 ld8 r21 = [vp], 8 C M
239 .LL00: st8 [rp] = r8, 8 C M
240 logop( r14, r18, r22) C M I
241 notormov( r9, r15) C M I
242 ld8 r18 = [up], 8 C M
243 ld8 r22 = [vp], 8 C M
246 .LL11: st8 [rp] = r9, 8 C M
247 logop( r15, r19, r23) C M I
248 notormov( r8, r14) C M I
249 ld8 r19 = [up], 8 C M
250 ld8 r23 = [vp], 8 C M
251 br.cloop.dptk .Loop ;; C B
252 C *** MAIN LOOP END ***
254 .Lcj6: st8 [rp] = r8, 8 C M
255 logop( r14, r16, r20) C M I
256 notormov( r9, r15) C M I
258 .Lcj5: st8 [rp] = r9, 8 C M
259 logop( r15, r17, r21) C M I
260 notormov( r8, r14) C M I
262 .Lcj4: st8 [rp] = r8, 8 C M
263 logop( r14, r18, r22) C M I
264 notormov( r9, r15) C M I
266 .Lcj3: st8 [rp] = r9, 8 C M
267 logop( r15, r19, r23) C M I
268 notormov( r8, r14) C M I
270 .Lcj2: st8 [rp] = r8, 8 C M
271 notormov( r9, r15) C M I
273 .Lcj1: st8 [rp] = r9, 8 C M
274 mov.i ar.lc = r2 C I0
275 br.ret.sptk.many b0 C B