gpu: drm: img: Fix PVRSRV device initialization time
[platform/kernel/linux-starfive.git] / arch / mips / math-emu / sp_maddf.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IEEE754 floating point arithmetic
4  * single precision: MADDF.f (Fused Multiply Add)
5  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6  *
7  * MIPS floating point support
8  * Copyright (C) 2015 Imagination Technologies, Ltd.
9  * Author: Markos Chandras <markos.chandras@imgtec.com>
10  */
11
12 #include "ieee754sp.h"
13
14
15 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
16                                  union ieee754sp y, enum maddf_flags flags)
17 {
18         int re;
19         int rs;
20         unsigned int rm;
21         u64 rm64;
22         u64 zm64;
23         int s;
24
25         COMPXSP;
26         COMPYSP;
27         COMPZSP;
28
29         EXPLODEXSP;
30         EXPLODEYSP;
31         EXPLODEZSP;
32
33         FLUSHXSP;
34         FLUSHYSP;
35         FLUSHZSP;
36
37         ieee754_clearcx();
38
39         rs = xs ^ ys;
40         if (flags & MADDF_NEGATE_PRODUCT)
41                 rs ^= 1;
42         if (flags & MADDF_NEGATE_ADDITION)
43                 zs ^= 1;
44
45         /*
46          * Handle the cases when at least one of x, y or z is a NaN.
47          * Order of precedence is sNaN, qNaN and z, x, y.
48          */
49         if (zc == IEEE754_CLASS_SNAN)
50                 return ieee754sp_nanxcpt(z);
51         if (xc == IEEE754_CLASS_SNAN)
52                 return ieee754sp_nanxcpt(x);
53         if (yc == IEEE754_CLASS_SNAN)
54                 return ieee754sp_nanxcpt(y);
55         if (zc == IEEE754_CLASS_QNAN)
56                 return z;
57         if (xc == IEEE754_CLASS_QNAN)
58                 return x;
59         if (yc == IEEE754_CLASS_QNAN)
60                 return y;
61
62         if (zc == IEEE754_CLASS_DNORM)
63                 SPDNORMZ;
64         /* ZERO z cases are handled separately below */
65
66         switch (CLPAIR(xc, yc)) {
67
68
69         /*
70          * Infinity handling
71          */
72         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
73         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
74                 ieee754_setcx(IEEE754_INVALID_OPERATION);
75                 return ieee754sp_indef();
76
77         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
78         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
79         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
80         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
81         case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
82                 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
83                         /*
84                          * Cases of addition of infinities with opposite signs
85                          * or subtraction of infinities with same signs.
86                          */
87                         ieee754_setcx(IEEE754_INVALID_OPERATION);
88                         return ieee754sp_indef();
89                 }
90                 /*
91                  * z is here either not an infinity, or an infinity having the
92                  * same sign as product (x*y). The result must be an infinity,
93                  * and its sign is determined only by the sign of product (x*y).
94                  */
95                 return ieee754sp_inf(rs);
96
97         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
98         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
99         case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
100         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
101         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
102                 if (zc == IEEE754_CLASS_INF)
103                         return ieee754sp_inf(zs);
104                 if (zc == IEEE754_CLASS_ZERO) {
105                         /* Handle cases +0 + (-0) and similar ones. */
106                         if (zs == rs)
107                                 /*
108                                  * Cases of addition of zeros of equal signs
109                                  * or subtraction of zeroes of opposite signs.
110                                  * The sign of the resulting zero is in any
111                                  * such case determined only by the sign of z.
112                                  */
113                                 return z;
114
115                         return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
116                 }
117                 /* x*y is here 0, and z is not 0, so just return z */
118                 return z;
119
120         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121                 SPDNORMX;
122                 fallthrough;
123         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124                 if (zc == IEEE754_CLASS_INF)
125                         return ieee754sp_inf(zs);
126                 SPDNORMY;
127                 break;
128
129         case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130                 if (zc == IEEE754_CLASS_INF)
131                         return ieee754sp_inf(zs);
132                 SPDNORMX;
133                 break;
134
135         case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136                 if (zc == IEEE754_CLASS_INF)
137                         return ieee754sp_inf(zs);
138                 /* continue to real computations */
139         }
140
141         /* Finally get to do some computation */
142
143         /*
144          * Do the multiplication bit first
145          *
146          * rm = xm * ym, re = xe + ye basically
147          *
148          * At this point xm and ym should have been normalized.
149          */
150
151         /* rm = xm * ym, re = xe+ye basically */
152         assert(xm & SP_HIDDEN_BIT);
153         assert(ym & SP_HIDDEN_BIT);
154
155         re = xe + ye;
156
157         /* Multiple 24 bit xm and ym to give 48 bit results */
158         rm64 = (uint64_t)xm * ym;
159
160         /* Shunt to top of word */
161         rm64 = rm64 << 16;
162
163         /* Put explicit bit at bit 62 if necessary */
164         if ((int64_t) rm64 < 0) {
165                 rm64 = rm64 >> 1;
166                 re++;
167         }
168
169         assert(rm64 & (1 << 62));
170
171         if (zc == IEEE754_CLASS_ZERO) {
172                 /*
173                  * Move explicit bit from bit 62 to bit 26 since the
174                  * ieee754sp_format code expects the mantissa to be
175                  * 27 bits wide (24 + 3 rounding bits).
176                  */
177                 rm = XSPSRS64(rm64, (62 - 26));
178                 return ieee754sp_format(rs, re, rm);
179         }
180
181         /* Move explicit bit from bit 23 to bit 62 */
182         zm64 = (uint64_t)zm << (62 - 23);
183         assert(zm64 & (1 << 62));
184
185         /* Make the exponents the same */
186         if (ze > re) {
187                 /*
188                  * Have to shift r fraction right to align.
189                  */
190                 s = ze - re;
191                 rm64 = XSPSRS64(rm64, s);
192                 re += s;
193         } else if (re > ze) {
194                 /*
195                  * Have to shift z fraction right to align.
196                  */
197                 s = re - ze;
198                 zm64 = XSPSRS64(zm64, s);
199                 ze += s;
200         }
201         assert(ze == re);
202         assert(ze <= SP_EMAX);
203
204         /* Do the addition */
205         if (zs == rs) {
206                 /*
207                  * Generate 64 bit result by adding two 63 bit numbers
208                  * leaving result in zm64, zs and ze.
209                  */
210                 zm64 = zm64 + rm64;
211                 if ((int64_t)zm64 < 0) {        /* carry out */
212                         zm64 = XSPSRS1(zm64);
213                         ze++;
214                 }
215         } else {
216                 if (zm64 >= rm64) {
217                         zm64 = zm64 - rm64;
218                 } else {
219                         zm64 = rm64 - zm64;
220                         zs = rs;
221                 }
222                 if (zm64 == 0)
223                         return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
224
225                 /*
226                  * Put explicit bit at bit 62 if necessary.
227                  */
228                 while ((zm64 >> 62) == 0) {
229                         zm64 <<= 1;
230                         ze--;
231                 }
232         }
233
234         /*
235          * Move explicit bit from bit 62 to bit 26 since the
236          * ieee754sp_format code expects the mantissa to be
237          * 27 bits wide (24 + 3 rounding bits).
238          */
239         zm = XSPSRS64(zm64, (62 - 26));
240
241         return ieee754sp_format(zs, ze, zm);
242 }
243
244 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
245                                 union ieee754sp y)
246 {
247         return _sp_maddf(z, x, y, 0);
248 }
249
250 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
251                                 union ieee754sp y)
252 {
253         return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
254 }
255
256 union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
257                                 union ieee754sp y)
258 {
259         return _sp_maddf(z, x, y, 0);
260 }
261
262 union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
263                                 union ieee754sp y)
264 {
265         return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
266 }
267
268 union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
269                                 union ieee754sp y)
270 {
271         return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
272 }
273
274 union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
275                                 union ieee754sp y)
276 {
277         return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
278 }