From 32fcb36dbf4f729d0b0e6d5c386fd1d68486463d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 11 Jan 2013 23:31:06 -0800 Subject: [PATCH] Add 64-bit VIS3 optimized GMP routines for sparc. * math/Makefile: Recognize gmp-sysdep_routines. * sysdeps/sparc/sparc64/multiarch/Makefile: Add VIS3 optimized GMP routines to sysdeps. * sysdeps/sparc/sparc64/multiarch/add_n-vis3.S: New file. * sysdeps/sparc/sparc64/multiarch/add_n.S: New file. * sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S: New file. * sysdeps/sparc/sparc64/multiarch/addmul_1.S: New file. * sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S: New file. * sysdeps/sparc/sparc64/multiarch/mul_1.S: New file. * sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S: New file. * sysdeps/sparc/sparc64/multiarch/sub_n.S: New file. * sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S: New file. * sysdeps/sparc/sparc64/multiarch/submul_1.S: New file. --- ChangeLog | 14 ++++ math/Makefile | 3 +- sysdeps/sparc/sparc64/multiarch/Makefile | 8 +++ sysdeps/sparc/sparc64/multiarch/add_n-vis3.S | 67 +++++++++++++++++++ sysdeps/sparc/sparc64/multiarch/add_n.S | 56 ++++++++++++++++ sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S | 87 +++++++++++++++++++++++++ sysdeps/sparc/sparc64/multiarch/addmul_1.S | 56 ++++++++++++++++ sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S | 73 +++++++++++++++++++++ sysdeps/sparc/sparc64/multiarch/mul_1.S | 56 ++++++++++++++++ sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S | 71 ++++++++++++++++++++ sysdeps/sparc/sparc64/multiarch/sub_n.S | 56 ++++++++++++++++ sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S | 87 +++++++++++++++++++++++++ sysdeps/sparc/sparc64/multiarch/submul_1.S | 56 ++++++++++++++++ 13 files changed, 689 insertions(+), 1 deletion(-) create mode 100644 sysdeps/sparc/sparc64/multiarch/add_n-vis3.S create mode 100644 sysdeps/sparc/sparc64/multiarch/add_n.S create mode 100644 sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S create mode 100644 sysdeps/sparc/sparc64/multiarch/addmul_1.S create mode 100644 sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S create mode 100644 sysdeps/sparc/sparc64/multiarch/mul_1.S create mode 100644 sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S create mode 100644 sysdeps/sparc/sparc64/multiarch/sub_n.S create mode 100644 sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S create mode 100644 sysdeps/sparc/sparc64/multiarch/submul_1.S diff --git a/ChangeLog b/ChangeLog index fa9a546..33cc724 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,19 @@ 2013-01-11 David S. Miller + * math/Makefile: Recognize gmp-sysdep_routines. + * sysdeps/sparc/sparc64/multiarch/Makefile: Add VIS3 optimized GMP routines + to sysdeps. + * sysdeps/sparc/sparc64/multiarch/add_n-vis3.S: New file. + * sysdeps/sparc/sparc64/multiarch/add_n.S: New file. + * sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S: New file. + * sysdeps/sparc/sparc64/multiarch/addmul_1.S: New file. + * sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S: New file. + * sysdeps/sparc/sparc64/multiarch/mul_1.S: New file. + * sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S: New file. + * sysdeps/sparc/sparc64/multiarch/sub_n.S: New file. + * sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S: New file. + * sysdeps/sparc/sparc64/multiarch/submul_1.S: New file. + * sysdeps/sparc/sparc32/sparcv9/mul_1.S: Properly optimize for 32-bit sparc V9 rather than using V8 code. * sysdeps/sparc/sparc32/sparcv9/addmul_1.S: Likewise. diff --git a/math/Makefile b/math/Makefile index db50800..b9519cf 100644 --- a/math/Makefile +++ b/math/Makefile @@ -199,7 +199,8 @@ endif gmp-objs = $(patsubst %,$(common-objpfx)stdlib/%.o,\ add_n sub_n cmp addmul_1 mul_1 mul_n divmod_1 \ - lshift rshift mp_clz_tab udiv_qrnnd inlines) + lshift rshift mp_clz_tab udiv_qrnnd inlines \ + $(gmp-sysdep_routines)) $(objpfx)atest-exp: $(gmp-objs) $(objpfx)atest-sincos: $(gmp-objs) $(objpfx)atest-exp2: $(gmp-objs) diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile index 4ad7aff..111ed5f 100644 --- a/sysdeps/sparc/sparc64/multiarch/Makefile +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -10,3 +10,11 @@ ifeq ($(subdir),string) sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ memset-niagara1 memcpy-niagara4 memset-niagara4 endif + +ifeq ($(subdir),stdlib) +sysdep_routines += mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3 +endif + +ifeq ($(subdir),math) +gmp-sysdep_routines = mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3 +endif diff --git a/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S new file mode 100644 index 0000000..185f311 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S @@ -0,0 +1,67 @@ +! SPARC v9 64-bit VIS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and +! store sum in a third limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! . + +#include + +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define sz %o3 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_add_n_vis3) + subcc sz, 1, sz + be .Lfinal_limb + cmp %g0, 0 + +.Lloop: + ldx [s2_ptr + 0x00], tmp1 + add s2_ptr, 0x10, s2_ptr + ldx [s1_ptr + 0x00], tmp2 + add s1_ptr, 0x10, s1_ptr + ldx [s2_ptr - 0x08], tmp3 + add res_ptr, 0x10, res_ptr + ldx [s1_ptr - 0x08], tmp4 + sub sz, 2, sz + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr - 0x10] + addxccc tmp3, tmp4, tmp3 + brgz sz, .Lloop + stx tmp3, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s2_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x00], tmp2 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr + 0x00] + +.Lfinish: + retl + addxc %g0, %g0, %o0 +END(__mpn_add_n_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/add_n.S b/sysdeps/sparc/sparc64/multiarch/add_n.S new file mode 100644 index 0000000..25cae39 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/add_n.S @@ -0,0 +1,56 @@ +/* Multiple versions of add_n + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY(__mpn_add_n) + .type __mpn_add_n, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_add_n_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_add_n_vis3), %o1 +# else + set __mpn_add_n_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_add_n_generic), %o1 + xor %o1, %gdop_lox10(__mpn_add_n_generic), %o1 +# else + set __mpn_add_n_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_add_n) + +#define __mpn_add_n __mpn_add_n_generic +#include "../add_n.S" diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S new file mode 100644 index 0000000..f955b27 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S @@ -0,0 +1,87 @@ +! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a +! limb and add the result to a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! . + +#include + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz %i2 +#define s2_limb %i3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 +#define tmp5 %l0 +#define tmp6 %l1 +#define tmp7 %l2 +#define tmp8 %l3 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_addmul_1_vis3) + save %sp, -176, %sp + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + ldx [s1_ptr + 0x08], tmp2 + ldx [res_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp5 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp6 + add res_ptr, 0x10, res_ptr + mulx tmp2, s2_limb, tmp7 + sub sz, 2, sz + umulxhi tmp2, s2_limb, tmp8 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + addcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr - 0x10] + addcc carry, tmp7, tmp7 + addxc %g0, tmp8, carry + addcc tmp4, tmp7, tmp7 + addxc %g0, carry, carry + brgz sz, .Lloop + stx tmp7, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp5 + umulxhi tmp1, s2_limb, tmp6 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + addcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr + 0x00] + +.Lfinish: + jmpl %i7 + 8, %g0 + restore carry, 0, %o0 +END(__mpn_addmul_1_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/addmul_1.S b/sysdeps/sparc/sparc64/multiarch/addmul_1.S new file mode 100644 index 0000000..a1659e4 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/addmul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of addmul_1 + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY(__mpn_addmul_1) + .type __mpn_addmul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_addmul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_addmul_1_vis3), %o1 +# else + set __mpn_addmul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_addmul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_addmul_1_generic), %o1 +# else + set __mpn_addmul_1_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_addmul_1) + +#define __mpn_addmul_1 __mpn_addmul_1_generic +#include "../addmul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S new file mode 100644 index 0000000..61fbe27 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S @@ -0,0 +1,73 @@ +! SPARC v9 64-bit VIS3 __mpn_mul_1 -- Multiply a limb vector with a single +! limb and store the product in a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! . + +#include + +#define res_ptr %o0 +#define s1_ptr %o1 +#define sz %o2 +#define s2_limb %o3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_mul_1_vis3) + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp3 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp2 + sub sz, 2, sz + mulx tmp4, s2_limb, tmp1 + add res_ptr, 0x10, res_ptr + umulxhi tmp4, s2_limb, tmp4 + addcc carry, tmp3, tmp3 + stx tmp3, [res_ptr - 0x10] + addxc %g0, tmp2, carry + addcc carry, tmp1, tmp1 + addxc %g0, tmp4, carry + brgz sz, .Lloop + stx tmp1, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + mulx tmp1, s2_limb, tmp3 + umulxhi tmp1, s2_limb, tmp2 + addcc carry, tmp3, tmp3 + addxc %g0, tmp2, carry + stx tmp3, [res_ptr + 0x00] + +.Lfinish: + retl + mov carry, %o0 +END(__mpn_mul_1_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/mul_1.S b/sysdeps/sparc/sparc64/multiarch/mul_1.S new file mode 100644 index 0000000..25f51bf --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/mul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of mul_1 + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY(__mpn_mul_1) + .type __mpn_mul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_mul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_mul_1_vis3), %o1 +# else + set __mpn_mul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_mul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_mul_1_generic), %o1 +# else + set __mpn_mul_1_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_mul_1) + +#define __mpn_mul_1 __mpn_mul_1_generic +#include "../mul_1.S" diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S new file mode 100644 index 0000000..4e9a786 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S @@ -0,0 +1,71 @@ +! SPARC v9 64-bit VIS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 +! and store difference in a third limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! . + +#include + +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define sz %o3 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_sub_n_vis3) + subcc sz, 1, sz + be .Lfinal_limb + cmp %g0, 1 + +.Lloop: + ldx [s2_ptr + 0x00], tmp1 + add s2_ptr, 0x10, s2_ptr + ldx [s1_ptr + 0x00], tmp2 + add s1_ptr, 0x10, s1_ptr + ldx [s2_ptr - 0x08], tmp3 + add res_ptr, 0x10, res_ptr + ldx [s1_ptr - 0x08], tmp4 + sub sz, 2, sz + xnor tmp1, %g0, tmp1 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr - 0x10] + xnor tmp3, %g0, tmp3 + addxccc tmp3, tmp4, tmp3 + brgz sz, .Lloop + stx tmp3, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s2_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x00], tmp2 + xnor tmp1, %g0, tmp1 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr + 0x00] + +.Lfinish: + clr %o0 + retl + movcc %xcc, 1, %o0 +END(__mpn_sub_n_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/sub_n.S b/sysdeps/sparc/sparc64/multiarch/sub_n.S new file mode 100644 index 0000000..5e15bea --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/sub_n.S @@ -0,0 +1,56 @@ +/* Multiple versions of sub_n + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY(__mpn_sub_n) + .type __mpn_sub_n, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_sub_n_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_sub_n_vis3), %o1 +# else + set __mpn_sub_n_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_sub_n_generic), %o1 + xor %o1, %gdop_lox10(__mpn_sub_n_generic), %o1 +# else + set __mpn_sub_n_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_sub_n) + +#define __mpn_sub_n __mpn_sub_n_generic +#include "../sub_n.S" diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S new file mode 100644 index 0000000..8f10f91 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S @@ -0,0 +1,87 @@ +! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a +! limb and subtract the result from a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! . + +#include + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz %i2 +#define s2_limb %i3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 +#define tmp5 %l0 +#define tmp6 %l1 +#define tmp7 %l2 +#define tmp8 %l3 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_submul_1_vis3) + save %sp, -176, %sp + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + ldx [s1_ptr + 0x08], tmp2 + ldx [res_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp5 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp6 + add res_ptr, 0x10, res_ptr + mulx tmp2, s2_limb, tmp7 + sub sz, 2, sz + umulxhi tmp2, s2_limb, tmp8 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + subcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr - 0x10] + addcc carry, tmp7, tmp7 + addxc %g0, tmp8, carry + subcc tmp4, tmp7, tmp7 + addxc %g0, carry, carry + brgz sz, .Lloop + stx tmp7, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp5 + umulxhi tmp1, s2_limb, tmp6 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + subcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr + 0x00] + +.Lfinish: + jmpl %i7 + 8, %g0 + restore carry, 0, %o0 +END(__mpn_submul_1_vis3) diff --git a/sysdeps/sparc/sparc64/multiarch/submul_1.S b/sysdeps/sparc/sparc64/multiarch/submul_1.S new file mode 100644 index 0000000..68552e9 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/submul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of submul_1 + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +ENTRY(__mpn_submul_1) + .type __mpn_submul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_submul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_submul_1_vis3), %o1 +# else + set __mpn_submul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_submul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_submul_1_generic), %o1 +# else + set __mpn_submul_1_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_submul_1) + +#define __mpn_submul_1 __mpn_submul_1_generic +#include "../submul_1.S" -- 2.7.4