Upload Tizen:Base source
[external/gmp.git] / mpn / pa64 / umul.asm
1 dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
2
3 dnl  This file is part of the GNU MP Library.
4
5 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
6 dnl  it under the terms of the GNU Lesser General Public License as published
7 dnl  by the Free Software Foundation; either version 3 of the License, or (at
8 dnl  your option) any later version.
9
10 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
11 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
13 dnl  License for more details.
14
15 dnl  You should have received a copy of the GNU Lesser General Public License
16 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
17
18
19 dnl  Optimizations:
20 dnl  * Avoid skip instructions
21 dnl  * Put carry-generating and carry-consuming insns consecutively
22 dnl  * Don't allocate any stack, "home" positions for parameteters could be
23 dnl    used.
24
25 include(`../config.m4')
26
27 define(`p0',`%r28')
28 define(`p1',`%r29')
29 define(`t32',`%r19')
30 define(`t0',`%r20')
31 define(`t1',`%r21')
32 define(`x',`%r22')
33 define(`m0',`%r23')
34 define(`m1',`%r24')
35
36 ifdef(`HAVE_ABI_2_0w',
37 `       .level  2.0w
38 ',`     .level  2.0
39 ')
40 PROLOGUE(mpn_umul_ppmm_r)
41         ldo             128(%r30),%r30
42 ifdef(`HAVE_ABI_2_0w',
43 `       std             %r26,-64(%r30)
44         std             %r25,-56(%r30)
45         copy            %r24,%r31
46 ',`
47         depd            %r25,31,32,%r26
48         std             %r26,-64(%r30)
49         depd            %r23,31,32,%r24
50         std             %r24,-56(%r30)
51         ldw             -180(%r30),%r31
52 ')
53
54         fldd            -64(%r30),%fr4
55         fldd            -56(%r30),%fr5
56
57         xmpyu           %fr5R,%fr4R,%fr6
58         fstd            %fr6,-128(%r30)
59         xmpyu           %fr5R,%fr4L,%fr7
60         fstd            %fr7,-120(%r30)
61         xmpyu           %fr5L,%fr4R,%fr8
62         fstd            %fr8,-112(%r30)
63         xmpyu           %fr5L,%fr4L,%fr9
64         fstd            %fr9,-104(%r30)
65
66         depdi,z         1,31,1,t32              C t32 = 2^32
67
68         ldd             -128(%r30),p0           C lo = low 64 bit of product
69         ldd             -120(%r30),m0           C m0 = mid0 64 bit of product
70         ldd             -112(%r30),m1           C m1 = mid1 64 bit of product
71         ldd             -104(%r30),p1           C hi = high 64 bit of product
72
73         add,l,*nuv      m0,m1,x                 C x = m1+m0
74          add,l          t32,p1,p1               C propagate carry to mid of p1
75         depd,z          x,31,32,t0              C lo32(m1+m0)
76         add             t0,p0,p0
77         extrd,u         x,31,32,t1              C hi32(m1+m0)
78         add,dc          t1,p1,p1
79
80         std             p0,0(%r31)              C store low half of product
81 ifdef(`HAVE_ABI_2_0w',
82 `       copy            p1,%r28                 C return val in %r28
83 ',`     extrd,u         p1,31,32,%r28           C return val in %r28,%r29
84 ')
85         bve             (%r2)
86         ldo             -128(%r30),%r30
87 EPILOGUE(mpn_umul_ppmm_r)
88