1 dnl IA-64 mpn_lshift/mpn_rshift.
3 dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
27 C This code is scheduled deeply since the plain shift instructions shr and shl
28 C have a latency of 4 (on Itanium) or 3 (on Itanium 2). Poor scheduling of
29 C these instructions cause a 10 cycle replay trap on Itanium.
32 C * Optimize function entry and feed-in code.
42 ifdef(`OPERATION_lshift',`
48 define(`func',`mpn_lshift')
50 ifdef(`OPERATION_rshift',`
56 define(`func',`mpn_rshift')
59 MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
67 ` addp4 rp = 0, rp C M I
68 addp4 up = 0, up C M I
74 {.mmi; cmp.lt p14, p15 = 4, n C M I
77 }{.mmi; add r15 = -1, n C M I
78 sub tnc = 64, cnt C M I
81 }{.mmi; cmp.eq p6, p0 = 1, r14 C M I
82 cmp.eq p7, p0 = 2, r14 C M I
84 }{.mmi; cmp.eq p8, p0 = 3, r14 C M I
85 ifdef(`OPERATION_lshift',
86 ` shladd up = r15, 3, up C M I
87 shladd rp = r15, 3, rp') C M I
89 }{.mmi; add r11 = POFF, up C M I
90 ld8 r10 = [up], UPD C M01
99 .Lb00: ld8 r19 = [up], UPD
104 BSH r8 = r10, tnc C function return value
105 (p14) br.cond.dptk .grt4
121 .grt4: FSH r24 = r10, cnt
142 (p15) BSH r8 = r10, tnc C function return value I
143 (p15) FSH r22 = r10, cnt C I
144 (p15) br.cond.dptk .Lr1 C return B
146 .grt1: ld8 r18 = [up], UPD
149 BSH r8 = r10, tnc C function return value
170 .grt5: FSH r24 = r18, cnt
186 .Lb10: ld8 r17 = [up], UPD
187 (p14) br.cond.dptk .grt2
189 BSH r8 = r10, tnc C function return value
198 .grt2: ld8 r18 = [up], UPD
199 BSH r8 = r10, tnc C function return value
221 .grt6: or r14 = r21, r20
233 .Lb11: ld8 r16 = [up], UPD
236 BSH r8 = r10, tnc C function return value
237 (p14) br.cond.dptk .grt3
250 .grt3: ld8 r18 = [up], UPD
270 .grt7: or r15 = r27, r26
276 C *** MAIN LOOP START ***
279 {.mmi; st8 [rp] = r14, UPD C M2
280 or r15 = r27, r26 C M3
281 FSH r24 = r18, cnt C I0
282 }{.mmi; ld8 r18 = [up], UPD C M1
284 BSH r25 = r19, tnc C I1
287 {.mmi; st8 [rp] = r15, UPD
290 }{.mmi; ld8 r19 = [up], UPD
295 {.mmi; st8 [rp] = r14, UPD
298 }{.mmi; ld8 r16 = [up], UPD
303 {.mmi; st8 [rp] = r15, UPD
306 }{.mib; ld8 r17 = [up], UPD
311 C *** MAIN LOOP END ***
313 .Lbot: or r15 = r27, r26
318 .Lr7: or r14 = r21, r20
323 .Lr6: or r15 = r23, r22
328 .Lr5: st8 [rp] = r15, UPD
332 .Lr4: or r15 = r27, r26
335 .Lr3: or r14 = r21, r20
338 .Lr2: st8 [rp] = r14, UPD
340 .Lr1: st8 [rp] = r22, UPD C M23
342 br.ret.sptk.many b0 C B