From: Andreas Jaeger Date: Tue, 10 Sep 2002 11:23:00 +0000 (+0000) Subject: MIPS specific optimizations. X-Git-Tag: upstream/2.30~21164 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9f70d804bffbaf8db95a99ec238c6797d6ea65a0;p=external%2Fglibc.git MIPS specific optimizations. --- diff --git a/sysdeps/mips/fpu/e_sqrt.c b/sysdeps/mips/fpu/e_sqrt.c new file mode 100644 index 0000000..5449710 --- /dev/null +++ b/sysdeps/mips/fpu/e_sqrt.c @@ -0,0 +1,38 @@ +/* Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Hartvig Ekner , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + + +#include + + +#if (_MIPS_ISA >= _MIPS_ISA_MIPS2) + +double +__ieee754_sqrt (double x) +{ + double z; + __asm__ ("sqrt.d %0,%1" : "=f" (z) : "f" (x)); + return z; +} + +#else + +#include + +#endif diff --git a/sysdeps/mips/fpu/e_sqrtf.c b/sysdeps/mips/fpu/e_sqrtf.c new file mode 100644 index 0000000..3590ad4 --- /dev/null +++ b/sysdeps/mips/fpu/e_sqrtf.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Hartvig Ekner , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + + +#include + + +#if (_MIPS_ISA >= _MIPS_ISA_MIPS2) + +float +__ieee754_sqrtf (float x) +{ + float z; + __asm__ ("sqrt.s %0,%1" : "=f" (z) : "f" (x)); + return z; +} + +#else + +#include + +#endif + diff --git a/sysdeps/mips/fpu/fsetexcptflg.c b/sysdeps/mips/fpu/fsetexcptflg.c new file mode 100644 index 0000000..c65d793 --- /dev/null +++ b/sysdeps/mips/fpu/fsetexcptflg.c @@ -0,0 +1,43 @@ +/* Set floating-point environment exception handling. + Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Hartvig Ekner , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +int +fesetexceptflag (const fexcept_t *flagp, int excepts) +{ + fexcept_t temp; + + /* Get the current exceptions. */ + _FPU_GETCW (temp); + + /* Make sure the flags we want restored are legal. */ + excepts &= FE_ALL_EXCEPT; + + /* Now clear the bits called for, and copy them in from flagp. Note that + we ignore all non-flag bits from *flagp, so they don't matter. */ + temp = (temp & ~excepts) | (*flagp & excepts); + + _FPU_SETCW (temp); + + /* Success. */ + return 0; +} diff --git a/sysdeps/mips/memcpy.S b/sysdeps/mips/memcpy.S new file mode 100644 index 0000000..394265e --- /dev/null +++ b/sysdeps/mips/memcpy.S @@ -0,0 +1,130 @@ +/* Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Hartvig Ekner , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +/* void *memcpy(void *s1, const void *s2, size_t n); + + This routine could be optimized further for MIPS64, but this is left + as an exercise for the future. When it is done, the file should be kept + as a sisterfile to this one, and placed in the sysdeps/mips/mips64 + directory. */ + +#if __BYTE_ORDER == __BIG_ENDIAN +# define LWHI lwl /* high part is left in big-endian */ +# define SWHI swl /* high part is left in big-endian */ +# define LWLO lwr /* low part is right in big-endian */ +# define SWLO swr /* low part is right in big-endian */ +#else +# define LWHI lwr /* high part is right in little-endian */ +# define SWHI swr /* high part is right in little-endian */ +# define LWLO lwl /* low part is left in little-endian */ +# define SWLO swl /* low part is left in little-endian */ +#endif + +ENTRY (memcpy) + .set noreorder + + slti t0, a2, 8 # Less than 8? + bne t0, zero, $last8 + move v0, a0 # Setup exit value before too late + + xor t0, a1, a0 # Find a0/a1 displacement + andi t0, 0x3 + bne t0, zero, $shift # Go handle the unaligned case + subu t1, zero, a1 + andi t1, 0x3 # a0/a1 are aligned, but are we + beq t1, zero, $chk8w # starting in the middle of a word? + subu a2, t1 + LWHI t0, 0(a1) # Yes we are... take care of that + addu a1, t1 + SWHI t0, 0(a0) + addu a0, t1 + +$chk8w: andi t0, a2, 0x1f # 32 or more bytes left? + beq t0, a2, $chk1w + subu a3, a2, t0 # Yes + addu a3, a1 # a3 = end address of loop + move a2, t0 # a2 = what will be left after loop +$lop8w: lw t0, 0(a1) # Loop taking 8 words at a time + lw t1, 4(a1) + lw t2, 8(a1) + lw t3, 12(a1) + lw t4, 16(a1) + lw t5, 20(a1) + lw t6, 24(a1) + lw t7, 28(a1) + addiu a0, 32 + addiu a1, 32 + sw t0, -32(a0) + sw t1, -28(a0) + sw t2, -24(a0) + sw t3, -20(a0) + sw t4, -16(a0) + sw t5, -12(a0) + sw t6, -8(a0) + bne a1, a3, $lop8w + sw t7, -4(a0) + +$chk1w: andi t0, a2, 0x3 # 4 or more bytes left? + beq t0, a2, $last8 + subu a3, a2, t0 # Yes, handle them one word at a time + addu a3, a1 # a3 again end address + move a2, t0 +$lop1w: lw t0, 0(a1) + addiu a0, 4 + addiu a1, 4 + bne a1, a3, $lop1w + sw t0, -4(a0) + +$last8: blez a2, $lst8e # Handle last 8 bytes, one at a time + addu a3, a2, a1 +$lst8l: lb t0, 0(a1) + addiu a0, 1 + addiu a1, 1 + bne a1, a3, $lst8l + sb t0, -1(a0) +$lst8e: jr ra # Bye, bye + nop + +$shift: subu a3, zero, a0 # Src and Dest unaligned + andi a3, 0x3 # (unoptimized case...) + beq a3, zero, $shft1 + subu a2, a3 # a2 = bytes left + LWHI t0, 0(a1) # Take care of first odd part + LWLO t0, 3(a1) + addu a1, a3 + SWHI t0, 0(a0) + addu a0, a3 +$shft1: andi t0, a2, 0x3 + subu a3, a2, t0 + addu a3, a1 +$shfth: LWHI t1, 0(a1) # Limp through, word by word + LWLO t1, 3(a1) + addiu a0, 4 + addiu a1, 4 + bne a1, a3, $shfth + sw t1, -4(a0) + b $last8 # Handle anything which may be left + move a2, t0 + + .set reorder +END (memcpy) diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S new file mode 100644 index 0000000..7e3f129 --- /dev/null +++ b/sysdeps/mips/memset.S @@ -0,0 +1,83 @@ +/* Copyright (C) 2002 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Hartvig Ekner , 2002. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + + +/* void *memset(void *s, int c, size_t n). + + This routine could be optimized further for MIPS64, but this is left + as an exercise for the future. When it is done, the file should be kept + as a sisterfile to this one, and placed in the sysdeps/mips/mips64 + directory. */ + +#if __BYTE_ORDER == __BIG_ENDIAN +# define SWHI swl /* high part is left in big-endian */ +#else +# define SWHI swr /* high part is right in little-endian */ +#endif + +ENTRY (memset) + .set noreorder + + slti t1, a2, 8 # Less than 8? + bne t1, zero, $last8 + move v0, a0 # Setup exit value before too late + + beq a1, zero, $ueven # If zero pattern, no need to extend + andi a1, 0xff # Avoid problems with bogus arguments + sll t0, a1, 8 + or a1, t0 + sll t0, a1, 16 + or a1, t0 # a1 is now pattern in full word + +$ueven: subu t0, zero, a0 # Unaligned address? + andi t0, 0x3 + beq t0, zero, $chkw + subu a2, t0 + SWHI a1, 0(a0) # Yes, handle first unaligned part + addu a0, t0 # Now both a0 and a2 are updated + +$chkw: andi t0, a2, 0x7 # Enough left for one loop iteration? + beq t0, a2, $chkl + subu a3, a2, t0 + addu a3, a0 # a3 is last loop address +1 + move a2, t0 # a2 is now # of bytes left after loop +$loopw: addiu a0, 8 # Handle 2 words pr. iteration + sw a1, -8(a0) + bne a0, a3, $loopw + sw a1, -4(a0) + +$chkl: andi t0, a2, 0x4 # Check if there is at least a full + beq t0, zero, $last8 # word remaining after the loop + subu a2, t0 + sw a1, 0(a0) # Yes... + addiu a0, 4 + +$last8: blez a2, $exit # Handle last 8 bytes (if cnt>0) + addu a3, a2, a0 # a3 is last address +1 +$lst8l: addiu a0, 1 + bne a0, a3, $lst8l + sb a1, -1(a0) +$exit: j ra # Bye, bye + nop + + .set reorder +END (memset)