From 3ec404d88cefbe42d96a46f20f554f8366d64c33 Mon Sep 17 00:00:00 2001 From: Aleksandar Markovic Date: Mon, 21 Aug 2017 14:24:48 +0200 Subject: [PATCH] MIPS: math-emu: RINT.: Fix several problems by reimplementation Reimplement RINT. kernel emulation so that all RINT. specifications are met. For the sake of simplicity, let's analyze RINT.S only. Prior to this patch, RINT.S emulation was essentially implemented as (in pseudocode) = ieee754sp_flong(ieee754sp_tlong()), where ieee754sp_tlong() and ieee754sp_flong() are functions providing conversion from double to integer, and from integer to double, respectively. On surface, this implementation looks correct, but actually fails in many cases. Following problems were detected: 1. NaN and infinity cases will not be handled properly. The function ieee754sp_flong() never returns NaN nor infinity. 2. For RINT.S, for all inputs larger than LONG_MAX, and smaller than FLT_MAX, the result will be wrong, and the overflow exception will be erroneously set. A similar problem for negative inputs exists as well. 3. For some rounding modes, for some negative inputs close to zero, the return value will be zero, and should be -zero. This is because ieee754sp_flong() never returns -zero. This patch removes the problems above by implementing dedicated functions for RINT. emulation. The core of the new function functionality is adapted version of the core of the function ieee754sp_tlong(). However, there are many details that are implemented to match RINT. specification. It should be said that the functionality of ieee754sp_tlong() actually closely corresponds to CVT.L.S instruction, and it is used while emulating CVT.L.S. However, RINT.S and CVT.L.S instructions differ in many aspects. This patch fulfills missing support for RINT.. Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Cc: David S. Miller Cc: Douglas Leung Cc: Greg Kroah-Hartman Cc: Hans Verkuil Cc: James Hogan Cc: Maciej W. Rozycki Cc: Masahiro Yamada Cc: Mauro Carvalho Chehab Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: Randy Dunlap Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17141/ Signed-off-by: Ralf Baechle --- MAINTAINERS | 7 ++++ arch/mips/math-emu/Makefile | 6 ++- arch/mips/math-emu/cp1emu.c | 6 +-- arch/mips/math-emu/dp_rint.c | 89 +++++++++++++++++++++++++++++++++++++++++++ arch/mips/math-emu/ieee754.h | 2 + arch/mips/math-emu/sp_rint.c | 90 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 194 insertions(+), 6 deletions(-) create mode 100644 arch/mips/math-emu/dp_rint.c create mode 100644 arch/mips/math-emu/sp_rint.c diff --git a/MAINTAINERS b/MAINTAINERS index 63102ba..31d4274 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8802,6 +8802,13 @@ F: arch/mips/include/asm/mach-loongson32/ F: drivers/*/*loongson1* F: drivers/*/*/*loongson1* +MIPS RINT INSTRUCTION EMULATION +M: Aleksandar Markovic +L: linux-mips@linux-mips.org +S: Supported +F: arch/mips/math-emu/sp_rint.c +F: arch/mips/math-emu/dp_rint.c + MIROSOUND PCM20 FM RADIO RECEIVER DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile index e9bbc2a..e9f10b8 100644 --- a/arch/mips/math-emu/Makefile +++ b/arch/mips/math-emu/Makefile @@ -4,9 +4,11 @@ obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ - dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \ + dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \ + dp_fmax.o \ sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ - sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \ + sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \ + sp_fmax.o \ dsemul.o lib-y += ieee754d.o \ diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 9694e9e..58396cb 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1805,8 +1805,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, return SIGILL; SPFROMREG(fs, MIPSInst_FS(ir)); - rv.l = ieee754sp_tlong(fs); - rv.s = ieee754sp_flong(rv.l); + rv.s = ieee754sp_rint(fs); goto copcsr; } @@ -2134,8 +2133,7 @@ copcsr: return SIGILL; DPFROMREG(fs, MIPSInst_FS(ir)); - rv.l = ieee754dp_tlong(fs); - rv.d = ieee754dp_flong(rv.l); + rv.d = ieee754dp_rint(fs); goto copcsr; } diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c new file mode 100644 index 0000000..c3b9077 --- /dev/null +++ b/arch/mips/math-emu/dp_rint.c @@ -0,0 +1,89 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_rint(union ieee754dp x) +{ + union ieee754dp ret; + u64 residue; + int sticky; + int round; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= DP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (64 - DP_FBITS + xe); + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754dp_flong(xm); + DPSIGN(ret) = xs; + + return ret; +} diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h index d3be351..92dc8fa 100644 --- a/arch/mips/math-emu/ieee754.h +++ b/arch/mips/math-emu/ieee754.h @@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_fint(int x); union ieee754sp ieee754sp_flong(s64 x); union ieee754sp ieee754sp_fdp(union ieee754dp x); +union ieee754sp ieee754sp_rint(union ieee754sp x); int ieee754sp_tint(union ieee754sp x); s64 ieee754sp_tlong(union ieee754sp x); @@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x); union ieee754dp ieee754dp_fint(int x); union ieee754dp ieee754dp_flong(s64 x); union ieee754dp ieee754dp_fsp(union ieee754sp x); +union ieee754dp ieee754dp_rint(union ieee754dp x); int ieee754dp_tint(union ieee754dp x); s64 ieee754dp_tlong(union ieee754dp x); diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c new file mode 100644 index 0000000..70765b1 --- /dev/null +++ b/arch/mips/math-emu/sp_rint.c @@ -0,0 +1,90 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_rint(union ieee754sp x) +{ + union ieee754sp ret; + u32 residue; + int sticky; + int round; + int odd; + + COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */ + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= SP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (xe + 1); + residue <<= 31 - SP_FBITS; + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754sp_flong(xm); + SPSIGN(ret) = xs; + + return ret; +} -- 2.7.4