Setup dependent external sources
[platform/upstream/VK-GL-CTS.git] / external / spirv-tools / src / test / hex_float_test.cpp
1 // Copyright (c) 2015-2016 The Khronos Group Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <cfloat>
16 #include <cmath>
17 #include <cstdio>
18 #include <sstream>
19 #include <string>
20 #include <tuple>
21
22 #include <gmock/gmock.h>
23
24 #include "source/util/hex_float.h"
25 #include "unit_spirv.h"
26
27 namespace {
28 using ::testing::Eq;
29 using spvutils::BitwiseCast;
30 using spvutils::Float16;
31 using spvutils::FloatProxy;
32 using spvutils::HexFloat;
33 using spvutils::ParseNormalFloat;
34
35 // In this file "encode" means converting a number into a string,
36 // and "decode" means converting a string into a number.
37
38 using HexFloatTest =
39     ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
40 using DecodeHexFloatTest =
41     ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
42 using HexDoubleTest =
43     ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
44 using DecodeHexDoubleTest =
45     ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
46
47 // Hex-encodes a float value.
48 template <typename T>
49 std::string EncodeViaHexFloat(const T& value) {
50   std::stringstream ss;
51   ss << spvutils::HexFloat<T>(value);
52   return ss.str();
53 }
54
55 // The following two tests can't be DRY because they take different parameter
56 // types.
57
58 TEST_P(HexFloatTest, EncodeCorrectly) {
59   EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
60 }
61
62 TEST_P(HexDoubleTest, EncodeCorrectly) {
63   EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
64 }
65
66 // Decodes a hex-float string.
67 template <typename T>
68 FloatProxy<T> Decode(const std::string& str) {
69   spvutils::HexFloat<FloatProxy<T>> decoded(0.f);
70   EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
71   return decoded.value();
72 }
73
74 TEST_P(HexFloatTest, DecodeCorrectly) {
75   EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
76 }
77
78 TEST_P(HexDoubleTest, DecodeCorrectly) {
79   EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
80 }
81
82 INSTANTIATE_TEST_CASE_P(
83     Float32Tests, HexFloatTest,
84     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
85         {0.f, "0x0p+0"},
86         {1.f, "0x1p+0"},
87         {2.f, "0x1p+1"},
88         {3.f, "0x1.8p+1"},
89         {0.5f, "0x1p-1"},
90         {0.25f, "0x1p-2"},
91         {0.75f, "0x1.8p-1"},
92         {-0.f, "-0x0p+0"},
93         {-1.f, "-0x1p+0"},
94         {-0.5f, "-0x1p-1"},
95         {-0.25f, "-0x1p-2"},
96         {-0.75f, "-0x1.8p-1"},
97
98         // Larger numbers
99         {512.f, "0x1p+9"},
100         {-512.f, "-0x1p+9"},
101         {1024.f, "0x1p+10"},
102         {-1024.f, "-0x1p+10"},
103         {1024.f + 8.f, "0x1.02p+10"},
104         {-1024.f - 8.f, "-0x1.02p+10"},
105
106         // Small numbers
107         {1.0f / 512.f, "0x1p-9"},
108         {1.0f / -512.f, "-0x1p-9"},
109         {1.0f / 1024.f, "0x1p-10"},
110         {1.0f / -1024.f, "-0x1p-10"},
111         {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
112         {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
113
114         // lowest non-denorm
115         {float(ldexp(1.0f, -126)), "0x1p-126"},
116         {float(ldexp(-1.0f, -126)), "-0x1p-126"},
117
118         // Denormalized values
119         {float(ldexp(1.0f, -127)), "0x1p-127"},
120         {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
121         {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
122         {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
123         {float(ldexp(-1.0f, -127)), "-0x1p-127"},
124         {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
125         {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
126         {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
127
128         {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
129         {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
130          "0x1.8p-128"},
131
132     })),);
133
134 INSTANTIATE_TEST_CASE_P(
135     Float32NanTests, HexFloatTest,
136     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
137         // Various NAN and INF cases
138         {uint32_t(0xFF800000), "-0x1p+128"},         // -inf
139         {uint32_t(0x7F800000), "0x1p+128"},          // inf
140         {uint32_t(0xFFC00000), "-0x1.8p+128"},       // -nan
141         {uint32_t(0xFF800100), "-0x1.0002p+128"},    // -nan
142         {uint32_t(0xFF800c00), "-0x1.0018p+128"},    // -nan
143         {uint32_t(0xFF80F000), "-0x1.01ep+128"},     // -nan
144         {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"},  // -nan
145         {uint32_t(0x7FC00000), "0x1.8p+128"},        // +nan
146         {uint32_t(0x7F800100), "0x1.0002p+128"},     // +nan
147         {uint32_t(0x7f800c00), "0x1.0018p+128"},     // +nan
148         {uint32_t(0x7F80F000), "0x1.01ep+128"},      // +nan
149         {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"},   // +nan
150     })),);
151
152 INSTANTIATE_TEST_CASE_P(
153     Float64Tests, HexDoubleTest,
154     ::testing::ValuesIn(
155         std::vector<std::pair<FloatProxy<double>, std::string>>({
156             {0., "0x0p+0"},
157             {1., "0x1p+0"},
158             {2., "0x1p+1"},
159             {3., "0x1.8p+1"},
160             {0.5, "0x1p-1"},
161             {0.25, "0x1p-2"},
162             {0.75, "0x1.8p-1"},
163             {-0., "-0x0p+0"},
164             {-1., "-0x1p+0"},
165             {-0.5, "-0x1p-1"},
166             {-0.25, "-0x1p-2"},
167             {-0.75, "-0x1.8p-1"},
168
169             // Larger numbers
170             {512., "0x1p+9"},
171             {-512., "-0x1p+9"},
172             {1024., "0x1p+10"},
173             {-1024., "-0x1p+10"},
174             {1024. + 8., "0x1.02p+10"},
175             {-1024. - 8., "-0x1.02p+10"},
176
177             // Large outside the range of normal floats
178             {ldexp(1.0, 128), "0x1p+128"},
179             {ldexp(1.0, 129), "0x1p+129"},
180             {ldexp(-1.0, 128), "-0x1p+128"},
181             {ldexp(-1.0, 129), "-0x1p+129"},
182             {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
183             {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
184             {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
185             {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
186
187             // Small numbers
188             {1.0 / 512., "0x1p-9"},
189             {1.0 / -512., "-0x1p-9"},
190             {1.0 / 1024., "0x1p-10"},
191             {1.0 / -1024., "-0x1p-10"},
192             {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
193             {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
194
195             // Small outside the range of normal floats
196             {ldexp(1.0, -128), "0x1p-128"},
197             {ldexp(1.0, -129), "0x1p-129"},
198             {ldexp(-1.0, -128), "-0x1p-128"},
199             {ldexp(-1.0, -129), "-0x1p-129"},
200             {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
201             {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
202             {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
203             {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
204
205             // lowest non-denorm
206             {ldexp(1.0, -1022), "0x1p-1022"},
207             {ldexp(-1.0, -1022), "-0x1p-1022"},
208
209             // Denormalized values
210             {ldexp(1.0, -1023), "0x1p-1023"},
211             {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
212             {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
213             {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
214             {ldexp(-1.0, -1024), "-0x1p-1024"},
215             {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
216             {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
217             {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
218
219             {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
220             {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
221              "0x1.8p-1024"},
222
223         })),);
224
225 INSTANTIATE_TEST_CASE_P(
226     Float64NanTests, HexDoubleTest,
227     ::testing::ValuesIn(std::vector<
228                         std::pair<FloatProxy<double>, std::string>>({
229         // Various NAN and INF cases
230         {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"},                //-inf
231         {uint64_t(0x7FF0000000000000LL), "0x1p+1024"},                 //+inf
232         {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"},              // -nan
233         {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},             // -nan
234         {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"},  // -nan
235         {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"},          // -nan
236         {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"},  // -nan
237         {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},               // +nan
238         {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"},              // +nan
239         {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"},   // -nan
240         {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"},           // -nan
241         {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"},   // -nan
242     })),);
243
244 TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
245   std::stringstream s;
246   s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
247     << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
248   EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
249 }
250
251 TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
252   std::stringstream s;
253   s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
254     << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
255     << 9;
256   EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
257 }
258
259 TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
260   EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
261 }
262
263 TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
264   EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
265 }
266
267 INSTANTIATE_TEST_CASE_P(
268     Float32DecodeTests, DecodeHexFloatTest,
269     ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
270         {"0x0p+000", 0.f},
271         {"0x0p0", 0.f},
272         {"0x0p-0", 0.f},
273
274         // flush to zero cases
275         {"0x1p-500", 0.f},  // Exponent underflows.
276         {"-0x1p-500", -0.f},
277         {"0x0.00000000001p-126", 0.f},  // Fraction causes underflow.
278         {"-0x0.0000000001p-127", -0.f},
279         {"-0x0.01p-142", -0.f},  // Fraction causes additional underflow.
280         {"0x0.01p-142", 0.f},
281
282         // Some floats that do not encode the same way as they decode.
283         {"0x2p+0", 2.f},
284         {"0xFFp+0", 255.f},
285         {"0x0.8p+0", 0.5f},
286         {"0x0.4p+0", 0.25f},
287     })),);
288
289 INSTANTIATE_TEST_CASE_P(
290     Float32DecodeInfTests, DecodeHexFloatTest,
291     ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
292         // inf cases
293         {"-0x1p+128", uint32_t(0xFF800000)},   // -inf
294         {"0x32p+127", uint32_t(0x7F800000)},   // inf
295         {"0x32p+500", uint32_t(0x7F800000)},   // inf
296         {"-0x32p+127", uint32_t(0xFF800000)},  // -inf
297     })),);
298
299 INSTANTIATE_TEST_CASE_P(
300     Float64DecodeTests, DecodeHexDoubleTest,
301     ::testing::ValuesIn(
302         std::vector<std::pair<std::string, FloatProxy<double>>>({
303             {"0x0p+000", 0.},
304             {"0x0p0", 0.},
305             {"0x0p-0", 0.},
306
307             // flush to zero cases
308             {"0x1p-5000", 0.},  // Exponent underflows.
309             {"-0x1p-5000", -0.},
310             {"0x0.0000000000000001p-1023", 0.},  // Fraction causes underflow.
311             {"-0x0.000000000000001p-1024", -0.},
312             {"-0x0.01p-1090", -0.f},  // Fraction causes additional underflow.
313             {"0x0.01p-1090", 0.},
314
315             // Some floats that do not encode the same way as they decode.
316             {"0x2p+0", 2.},
317             {"0xFFp+0", 255.},
318             {"0x0.8p+0", 0.5},
319             {"0x0.4p+0", 0.25},
320         })),);
321
322 INSTANTIATE_TEST_CASE_P(
323     Float64DecodeInfTests, DecodeHexDoubleTest,
324     ::testing::ValuesIn(
325         std::vector<std::pair<std::string, FloatProxy<double>>>({
326             // inf cases
327             {"-0x1p+1024", uint64_t(0xFFF0000000000000)},   // -inf
328             {"0x32p+1023", uint64_t(0x7FF0000000000000)},   // inf
329             {"0x32p+5000", uint64_t(0x7FF0000000000000)},   // inf
330             {"-0x32p+1023", uint64_t(0xFFF0000000000000)},  // -inf
331         })),);
332
333 TEST(FloatProxy, ValidConversion) {
334   EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
335   EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
336   EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
337   EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
338   EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
339   EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
340
341   EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
342   EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
343   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
344   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
345   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
346   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
347   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
348   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
349   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
350   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
351   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
352   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
353
354   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
355   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
356   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
357   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
358   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
359   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
360   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
361   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
362   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
363   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
364   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
365   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
366 }
367
368 TEST(FloatProxy, Nan) {
369   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
370   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
371   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
372   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
373   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
374   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
375   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
376   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
377   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
378   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
379 }
380
381 TEST(FloatProxy, Negation) {
382   EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
383   EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
384
385   EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
386   EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
387
388   EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
389   EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
390
391   EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
392   EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
393
394   EXPECT_THAT(
395       (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
396       Eq(-std::numeric_limits<float>::infinity()));
397   EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
398                   .getAsFloat(),
399               Eq(std::numeric_limits<float>::infinity()));
400 }
401
402 // Test conversion of FloatProxy values to strings.
403 //
404 // In previous cases, we always wrapped the FloatProxy value in a HexFloat
405 // before conversion to a string.  In the following cases, the FloatProxy
406 // decides for itself whether to print as a regular number or as a hex float.
407
408 using FloatProxyFloatTest =
409     ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
410 using FloatProxyDoubleTest =
411     ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
412
413 // Converts a float value to a string via a FloatProxy.
414 template <typename T>
415 std::string EncodeViaFloatProxy(const T& value) {
416   std::stringstream ss;
417   ss << value;
418   return ss.str();
419 }
420
421 // Converts a floating point string so that the exponent prefix
422 // is 'e', and the exponent value does not have leading zeros.
423 // The Microsoft runtime library likes to write things like "2.5E+010".
424 // Convert that to "2.5e+10".
425 // We don't care what happens to strings that are not floating point
426 // strings.
427 std::string NormalizeExponentInFloatString(std::string in) {
428   std::string result;
429   // Reserve one spot for the terminating null, even when the sscanf fails.
430   std::vector<char> prefix(in.size() + 1);
431   char e;
432   char plus_or_minus;
433   int exponent;  // in base 10
434   if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
435                         &plus_or_minus, &exponent)) &&
436       (e == 'e' || e == 'E') &&
437       (plus_or_minus == '-' || plus_or_minus == '+')) {
438     // It looks like a floating point value with exponent.
439     std::stringstream out;
440     out << prefix.data() << 'e' << plus_or_minus << exponent;
441     result = out.str();
442   } else {
443     result = in;
444   }
445   return result;
446 }
447
448 TEST(NormalizeFloat, Sample) {
449   EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
450   EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
451   EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
452   EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
453   EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
454 }
455
456 // The following two tests can't be DRY because they take different parameter
457 // types.
458 TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
459   EXPECT_THAT(
460       NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
461       Eq(GetParam().second));
462 }
463
464 TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
465   EXPECT_THAT(
466       NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
467       Eq(GetParam().second));
468 }
469
470 INSTANTIATE_TEST_CASE_P(
471     Float32Tests, FloatProxyFloatTest,
472     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
473         // Zero
474         {0.f, "0"},
475         // Normal numbers
476         {1.f, "1"},
477         {-0.25f, "-0.25"},
478         {1000.0f, "1000"},
479
480         // Still normal numbers, but with large magnitude exponents.
481         {float(ldexp(1.f, 126)), "8.50706e+37"},
482         {float(ldexp(-1.f, -126)), "-1.17549e-38"},
483
484         // denormalized values are printed as hex floats.
485         {float(ldexp(1.0f, -127)), "0x1p-127"},
486         {float(ldexp(1.5f, -128)), "0x1.8p-128"},
487         {float(ldexp(1.25, -129)), "0x1.4p-129"},
488         {float(ldexp(1.125, -130)), "0x1.2p-130"},
489         {float(ldexp(-1.0f, -127)), "-0x1p-127"},
490         {float(ldexp(-1.0f, -128)), "-0x1p-128"},
491         {float(ldexp(-1.0f, -129)), "-0x1p-129"},
492         {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
493
494         // NaNs
495         {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
496         {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
497
498         {std::numeric_limits<float>::infinity(), "0x1p+128"},
499         {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
500     })),);
501
502 INSTANTIATE_TEST_CASE_P(
503     Float64Tests, FloatProxyDoubleTest,
504     ::testing::ValuesIn(
505         std::vector<std::pair<FloatProxy<double>, std::string>>({
506             {0., "0"},
507             {1., "1"},
508             {-0.25, "-0.25"},
509             {1000.0, "1000"},
510
511             // Large outside the range of normal floats
512             {ldexp(1.0, 128), "3.40282366920938e+38"},
513             {ldexp(1.5, 129), "1.02084710076282e+39"},
514             {ldexp(-1.0, 128), "-3.40282366920938e+38"},
515             {ldexp(-1.5, 129), "-1.02084710076282e+39"},
516
517             // Small outside the range of normal floats
518             {ldexp(1.5, -129), "2.20405190779179e-39"},
519             {ldexp(-1.5, -129), "-2.20405190779179e-39"},
520
521             // lowest non-denorm
522             {ldexp(1.0, -1022), "2.2250738585072e-308"},
523             {ldexp(-1.0, -1022), "-2.2250738585072e-308"},
524
525             // Denormalized values
526             {ldexp(1.125, -1023), "0x1.2p-1023"},
527             {ldexp(-1.375, -1024), "-0x1.6p-1024"},
528
529             // NaNs
530             {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
531             {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
532
533             // Infinity
534             {std::numeric_limits<double>::infinity(), "0x1p+1024"},
535             {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
536
537         })),);
538
539 // double is used so that unbiased_exponent can be used with the output
540 // of ldexp directly.
541 int32_t unbiased_exponent(double f) {
542   return spvutils::HexFloat<spvutils::FloatProxy<float>>(
543       static_cast<float>(f)).getUnbiasedNormalizedExponent();
544 }
545
546 int16_t unbiased_half_exponent(uint16_t f) {
547   return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f)
548       .getUnbiasedNormalizedExponent();
549 }
550
551 TEST(HexFloatOperationTest, UnbiasedExponent) {
552   // Float cases
553   EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
554   EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
555   EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
556   EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
557   // Saturates to 128
558   EXPECT_EQ(128, unbiased_exponent(ldexp(1.0f, 256)));
559
560   EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
561   EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
562   EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
563   EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
564   EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
565   // Smallest representable number
566   EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
567   // Should get rounded to 0 first.
568   EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
569
570   // Float16 cases
571   // The exponent is represented in the bits 0x7C00
572   // The offset is -15
573   EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
574   EXPECT_EQ(3, unbiased_half_exponent(0x4800));
575   EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
576   EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
577   EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
578   EXPECT_EQ(10, unbiased_half_exponent(0x6400));
579
580   // Smallest representable number
581   EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
582 }
583
584 // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
585 float float_fractions(const std::vector<uint32_t>& fractions) {
586   float f = 0;
587   for(int32_t i: fractions) {
588     f += std::ldexp(1.0f, -i);
589   }
590   return f;
591 }
592
593 // Returns the normalized significand of a HexFloat<FloatProxy<float>>
594 // that was created by calling float_fractions with the input fractions,
595 // raised to the power of exp.
596 uint32_t normalized_significand(const std::vector<uint32_t>& fractions, uint32_t exp) {
597   return spvutils::HexFloat<spvutils::FloatProxy<float>>(
598              static_cast<float>(ldexp(float_fractions(fractions), exp)))
599       .getNormalizedSignificand();
600 }
601
602 // Sets the bits from MSB to LSB of the significand part of a float.
603 // For example 0 would set the bit 23 (counting from LSB to MSB),
604 // and 1 would set the 22nd bit.
605 uint32_t bits_set(const std::vector<uint32_t>& bits) {
606   const uint32_t top_bit = 1u << 22u;
607   uint32_t val= 0;
608   for(uint32_t i: bits) {
609     val |= top_bit >> i;
610   }
611   return val;
612 }
613
614 // The same as bits_set but for a Float16 value instead of 32-bit floating
615 // point.
616 uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
617   const uint32_t top_bit = 1u << 9u;
618   uint32_t val= 0;
619   for(uint32_t i: bits) {
620     val |= top_bit >> i;
621   }
622   return static_cast<uint16_t>(val);
623 }
624
625 TEST(HexFloatOperationTest, NormalizedSignificand) {
626   // For normalized numbers (the following) it should be a simple matter
627   // of getting rid of the top implicit bit
628   EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
629   EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
630   EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
631   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
632   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
633   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
634
635   // For denormalized numbers we expect the normalized significand to
636   // shift as if it were normalized. This means, in practice that the
637   // top_most set bit will be cut off. Looks very similar to above (on purpose)
638   EXPECT_EQ(bits_set({}), normalized_significand({0}, -127));
639   EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128));
640   EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127));
641   EXPECT_EQ(bits_set({}), normalized_significand({22}, -127));
642   EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127));
643 }
644
645 // Returns the 32-bit floating point value created by
646 // calling setFromSignUnbiasedExponentAndNormalizedSignificand
647 // on a HexFloat<FloatProxy<float>>
648 float set_from_sign(bool negative, int32_t unbiased_exponent,
649                    uint32_t significand, bool round_denorm_up) {
650   spvutils::HexFloat<spvutils::FloatProxy<float>>  f(0.f);
651   f.setFromSignUnbiasedExponentAndNormalizedSignificand(
652       negative, unbiased_exponent, significand, round_denorm_up);
653   return f.value().getAsFloat();
654 }
655
656 TEST(HexFloatOperationTests,
657      SetFromSignUnbiasedExponentAndNormalizedSignificand) {
658
659   EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
660
661   // Tests insertion of various denormalized numbers with and without round up.
662   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, false));
663   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, true));
664   EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
665   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -150, 1, true));
666
667   EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
668   EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
669   EXPECT_EQ(float_fractions({0, 1, 2, 5}),
670             set_from_sign(false, 0, bits_set({0, 1, 4}), false));
671   EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
672             set_from_sign(false, -32, bits_set({0, 1, 4}), false));
673   EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
674             set_from_sign(false, -128, bits_set({0, 1, 4}), false));
675
676   // The negative cases from above.
677   EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
678   EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
679   EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
680   EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
681             set_from_sign(true, 0, bits_set({0, 1, 4}), false));
682   EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
683             set_from_sign(true, -32, bits_set({0, 1, 4}), false));
684   EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
685             set_from_sign(true, -128, bits_set({0, 1, 4}), false));
686 }
687
688 TEST(HexFloatOperationTests, NonRounding) {
689   // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
690   // except in the denorm case which is a bit more complex.
691   using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
692   bool carry_bit = false;
693
694   spvutils::round_direction rounding[] = {
695       spvutils::round_direction::kToZero,
696       spvutils::round_direction::kToNearestEven,
697       spvutils::round_direction::kToPositiveInfinity,
698       spvutils::round_direction::kToNegativeInfinity};
699
700   // Everything fits, so this should be straight-forward
701   for (spvutils::round_direction round : rounding) {
702     EXPECT_EQ(bits_set({}), HF(0.f).getRoundedNormalizedSignificand<HF>(
703                                 round, &carry_bit));
704     EXPECT_FALSE(carry_bit);
705
706     EXPECT_EQ(bits_set({0}),
707               HF(float_fractions({0, 1}))
708                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
709     EXPECT_FALSE(carry_bit);
710
711     EXPECT_EQ(bits_set({1, 3}),
712               HF(float_fractions({0, 2, 4}))
713                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
714     EXPECT_FALSE(carry_bit);
715
716     EXPECT_EQ(
717         bits_set({0, 1, 4}),
718         HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
719             .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
720     EXPECT_FALSE(carry_bit);
721
722     EXPECT_EQ(
723         bits_set({0, 1, 4, 22}),
724         HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
725             .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
726     EXPECT_FALSE(carry_bit);
727   }
728 }
729
730 using RD = spvutils::round_direction;
731 struct RoundSignificandCase {
732   float source_float;
733   std::pair<int16_t, bool> expected_results;
734   spvutils::round_direction round;
735 };
736
737 using HexFloatRoundTest =
738     ::testing::TestWithParam<RoundSignificandCase>;
739
740 TEST_P(HexFloatRoundTest, RoundDownToFP16) {
741   using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
742   using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
743
744   HF input_value(GetParam().source_float);
745   bool carry_bit = false;
746   EXPECT_EQ(GetParam().expected_results.first,
747             input_value.getRoundedNormalizedSignificand<HF16>(
748                 GetParam().round, &carry_bit));
749   EXPECT_EQ(carry_bit, GetParam().expected_results.second);
750 }
751
752 // clang-format off
753 INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatRoundTest,
754   ::testing::ValuesIn(std::vector<RoundSignificandCase>(
755   {
756     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
757     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
758     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
759     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
760     {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
761
762     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
763     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
764     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
765     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
766
767     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
768     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
769     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
770     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
771
772     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
773     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
774     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
775     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
776
777     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
778     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
779     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
780     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
781
782     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
783     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
784     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
785     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
786
787     // Carries
788     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
789     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
790     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
791     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
792
793     // Cases where original number was denorm. Note: this should have no effect
794     // the number is pre-normalized.
795     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
796     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
797     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
798     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
799   })),);
800 // clang-format on
801
802 struct UpCastSignificandCase {
803   uint16_t source_half;
804   uint32_t expected_result;
805 };
806
807 using HexFloatRoundUpSignificandTest =
808     ::testing::TestWithParam<UpCastSignificandCase>;
809 TEST_P(HexFloatRoundUpSignificandTest, Widening) {
810   using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
811   using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
812   bool carry_bit = false;
813
814   spvutils::round_direction rounding[] = {
815       spvutils::round_direction::kToZero,
816       spvutils::round_direction::kToNearestEven,
817       spvutils::round_direction::kToPositiveInfinity,
818       spvutils::round_direction::kToNegativeInfinity};
819
820   // Everything fits, so everything should just be bit-shifts.
821   for (spvutils::round_direction round : rounding) {
822     carry_bit = false;
823     HF16 input_value(GetParam().source_half);
824     EXPECT_EQ(
825         GetParam().expected_result,
826         input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
827         << std::hex << "0x"
828         << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
829         << "  0x" << GetParam().expected_result;
830     EXPECT_FALSE(carry_bit);
831   }
832 }
833
834 INSTANTIATE_TEST_CASE_P(F16toF32, HexFloatRoundUpSignificandTest,
835   // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
836   // They are ignored for this test.
837   ::testing::ValuesIn(std::vector<UpCastSignificandCase>(
838   {
839     {0x3F00, 0x600000},
840     {0x0F00, 0x600000},
841     {0x0F01, 0x602000},
842     {0x0FFF, 0x7FE000},
843   })),);
844
845 struct DownCastTest {
846   float source_float;
847   uint16_t expected_half;
848   std::vector<spvutils::round_direction> directions;
849 };
850
851 std::string get_round_text(spvutils::round_direction direction) {
852 #define CASE(round_direction) \
853   case round_direction:      \
854     return #round_direction
855
856   switch (direction) {
857     CASE(spvutils::round_direction::kToZero);
858     CASE(spvutils::round_direction::kToPositiveInfinity);
859     CASE(spvutils::round_direction::kToNegativeInfinity);
860     CASE(spvutils::round_direction::kToNearestEven);
861   }
862 #undef CASE
863   return "";
864 }
865
866 using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
867
868 TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
869   using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
870   using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
871   HF f(GetParam().source_float);
872   for (auto round : GetParam().directions) {
873     HF16 half(0);
874     f.castTo(half, round);
875     EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
876         << get_round_text(round) << "  " << std::hex
877         << spvutils::BitwiseCast<uint32_t>(GetParam().source_float)
878         << " cast to: " << half.value().getAsFloat().get_value();
879   }
880 }
881
882 const uint16_t positive_infinity = 0x7C00;
883 const uint16_t negative_infinity = 0xFC00;
884
885 INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatFP32To16Tests,
886   ::testing::ValuesIn(std::vector<DownCastTest>(
887   {
888     // Exactly representable as half.
889     {0.f, 0x0, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
890     {-0.f, 0x8000, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
891     {1.0f, 0x3C00, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
892     {-1.0f, 0xBC00, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
893
894     {float_fractions({0, 1, 10}) , 0x3E01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
895     {-float_fractions({0, 1, 10}) , 0xBE01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
896     {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 0x4A01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
897     {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 0xCA01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
898
899
900     // Underflow
901     {static_cast<float>(ldexp(1.0f, -25)), 0x0, {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
902     {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
903     {static_cast<float>(-ldexp(1.0f, -25)), 0x8000, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
904     {static_cast<float>(-ldexp(1.0f, -25)), 0x8001, {RD::kToNegativeInfinity}},
905     {static_cast<float>(ldexp(1.0f, -24)), 0x1, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
906
907     // Overflow
908     {static_cast<float>(ldexp(1.0f, 16)), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
909     {static_cast<float>(ldexp(1.0f, 18)), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
910     {static_cast<float>(ldexp(1.3f, 16)), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
911     {static_cast<float>(-ldexp(1.0f, 16)), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
912     {static_cast<float>(-ldexp(1.0f, 18)), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
913     {static_cast<float>(-ldexp(1.3f, 16)), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
914
915     // Transfer of Infinities
916     {std::numeric_limits<float>::infinity(), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
917     {-std::numeric_limits<float>::infinity(), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
918
919     // Nans are below because we cannot test for equality.
920   })),);
921
922 struct UpCastCase{
923   uint16_t source_half;
924   float expected_float;
925 };
926
927 using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
928 TEST_P(HexFloatFP16To32Tests, WideningCasts) {
929   using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
930   using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
931   HF16 f(GetParam().source_half);
932
933   spvutils::round_direction rounding[] = {
934       spvutils::round_direction::kToZero,
935       spvutils::round_direction::kToNearestEven,
936       spvutils::round_direction::kToPositiveInfinity,
937       spvutils::round_direction::kToNegativeInfinity};
938
939   // Everything fits, so everything should just be bit-shifts.
940   for (spvutils::round_direction round : rounding) {
941     HF flt(0.f);
942     f.castTo(flt, round);
943     EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
944         << get_round_text(round) << "  " << std::hex
945         << spvutils::BitwiseCast<uint16_t>(GetParam().source_half)
946         << " cast to: " << flt.value().getAsFloat();
947   }
948 }
949
950 INSTANTIATE_TEST_CASE_P(F16ToF32, HexFloatFP16To32Tests,
951   ::testing::ValuesIn(std::vector<UpCastCase>(
952   {
953     {0x0000, 0.f},
954     {0x8000, -0.f},
955     {0x3C00, 1.0f},
956     {0xBC00, -1.0f},
957     {0x3F00, float_fractions({0, 1, 2})},
958     {0xBF00, -float_fractions({0, 1, 2})},
959     {0x3F01, float_fractions({0, 1, 2, 10})},
960     {0xBF01, -float_fractions({0, 1, 2, 10})},
961
962     // denorm
963     {0x0001, static_cast<float>(ldexp(1.0, -24))},
964     {0x0002, static_cast<float>(ldexp(1.0, -23))},
965     {0x8001, static_cast<float>(-ldexp(1.0, -24))},
966     {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
967
968     // inf
969     {0x7C00, std::numeric_limits<float>::infinity()},
970     {0xFC00, -std::numeric_limits<float>::infinity()},
971   })),);
972
973 TEST(HexFloatOperationTests, NanTests) {
974   using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
975   using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
976   spvutils::round_direction rounding[] = {
977       spvutils::round_direction::kToZero,
978       spvutils::round_direction::kToNearestEven,
979       spvutils::round_direction::kToPositiveInfinity,
980       spvutils::round_direction::kToNegativeInfinity};
981
982   // Everything fits, so everything should just be bit-shifts.
983   for (spvutils::round_direction round : rounding) {
984     HF16 f16(0);
985     HF f(0.f);
986     HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
987     EXPECT_TRUE(f16.value().isNan());
988     HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
989     EXPECT_TRUE(f16.value().isNan());
990
991     HF16(0x7C01).castTo(f, round);
992     EXPECT_TRUE(f.value().isNan());
993     HF16(0x7C11).castTo(f, round);
994     EXPECT_TRUE(f.value().isNan());
995     HF16(0xFC01).castTo(f, round);
996     EXPECT_TRUE(f.value().isNan());
997     HF16(0x7C10).castTo(f, round);
998     EXPECT_TRUE(f.value().isNan());
999     HF16(0xFF00).castTo(f, round);
1000     EXPECT_TRUE(f.value().isNan());
1001   }
1002 }
1003
1004 // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
1005 template <typename T>
1006 struct FloatParseCase {
1007   std::string literal;
1008   bool negate_value;
1009   bool expect_success;
1010   HexFloat<FloatProxy<T>> expected_value;
1011 };
1012
1013 using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
1014
1015 TEST_P(ParseNormalFloatTest, Samples) {
1016   std::stringstream input(GetParam().literal);
1017   HexFloat<FloatProxy<float>> parsed_value(0.0f);
1018   ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1019   EXPECT_NE(GetParam().expect_success, input.fail())
1020       << " literal: " << GetParam().literal
1021       << " negate: " << GetParam().negate_value;
1022   if (GetParam().expect_success) {
1023     EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1024         << " literal: " << GetParam().literal
1025         << " negate: " << GetParam().negate_value;
1026   }
1027 }
1028
1029 // Returns a FloatParseCase with expected failure.
1030 template <typename T>
1031 FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
1032                                     T expected_value) {
1033   HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1034   return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
1035 }
1036
1037 // Returns a FloatParseCase that should successfully parse to a given value.
1038 template <typename T>
1039 FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
1040                                      T expected_value) {
1041   HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1042   return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
1043 }
1044
1045 INSTANTIATE_TEST_CASE_P(
1046     FloatParse, ParseNormalFloatTest,
1047     ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
1048         // Failing cases due to trivially incorrect syntax.
1049         BadFloatParseCase("abc", false, 0.0f),
1050         BadFloatParseCase("abc", true, 0.0f),
1051
1052         // Valid cases.
1053         GoodFloatParseCase("0", false, 0.0f),
1054         GoodFloatParseCase("0.0", false, 0.0f),
1055         GoodFloatParseCase("-0.0", false, -0.0f),
1056         GoodFloatParseCase("2.0", false, 2.0f),
1057         GoodFloatParseCase("-2.0", false, -2.0f),
1058         GoodFloatParseCase("+2.0", false, 2.0f),
1059         // Cases with negate_value being true.
1060         GoodFloatParseCase("0.0", true, -0.0f),
1061         GoodFloatParseCase("2.0", true, -2.0f),
1062
1063         // When negate_value is true, we should not accept a
1064         // leading minus or plus.
1065         BadFloatParseCase("-0.0", true, 0.0f),
1066         BadFloatParseCase("-2.0", true, 0.0f),
1067         BadFloatParseCase("+0.0", true, 0.0f),
1068         BadFloatParseCase("+2.0", true, 0.0f),
1069
1070         // Overflow is an error for 32-bit float parsing.
1071         BadFloatParseCase("1e40", false, FLT_MAX),
1072         BadFloatParseCase("1e40", true, -FLT_MAX),
1073         BadFloatParseCase("-1e40", false, -FLT_MAX),
1074         // We can't have -1e40 and negate_value == true since
1075         // that represents an original case of "--1e40" which
1076         // is invalid.
1077   }),);
1078
1079 using ParseNormalFloat16Test =
1080     ::testing::TestWithParam<FloatParseCase<Float16>>;
1081
1082 TEST_P(ParseNormalFloat16Test, Samples) {
1083   std::stringstream input(GetParam().literal);
1084   HexFloat<FloatProxy<Float16>> parsed_value(0);
1085   ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1086   EXPECT_NE(GetParam().expect_success, input.fail())
1087       << " literal: " << GetParam().literal
1088       << " negate: " << GetParam().negate_value;
1089   if (GetParam().expect_success) {
1090     EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1091         << " literal: " << GetParam().literal
1092         << " negate: " << GetParam().negate_value;
1093   }
1094 }
1095
1096 INSTANTIATE_TEST_CASE_P(
1097     Float16Parse, ParseNormalFloat16Test,
1098     ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
1099         // Failing cases due to trivially incorrect syntax.
1100         BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
1101         BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
1102
1103         // Valid cases.
1104         GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
1105         GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
1106         GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
1107         GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
1108         GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
1109         GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
1110         // Cases with negate_value being true.
1111         GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
1112         GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
1113
1114         // When negate_value is true, we should not accept a leading minus or
1115         // plus.
1116         BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
1117         BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
1118         BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
1119         BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
1120     }),);
1121
1122 // A test case for detecting infinities.
1123 template <typename T>
1124 struct OverflowParseCase {
1125   std::string input;
1126   bool expect_success;
1127   T expected_value;
1128 };
1129
1130 using FloatProxyParseOverflowFloatTest =
1131     ::testing::TestWithParam<OverflowParseCase<float>>;
1132
1133 TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
1134   std::istringstream input(GetParam().input);
1135   HexFloat<FloatProxy<float>> value(0.0f);
1136   input >> value;
1137   EXPECT_NE(GetParam().expect_success, input.fail());
1138   if (GetParam().expect_success) {
1139     EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
1140   }
1141 }
1142
1143 INSTANTIATE_TEST_CASE_P(
1144     FloatOverflow, FloatProxyParseOverflowFloatTest,
1145     ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
1146         {"0", true, 0.0f},
1147         {"0.0", true, 0.0f},
1148         {"1.0", true, 1.0f},
1149         {"1e38", true, 1e38f},
1150         {"-1e38", true, -1e38f},
1151         {"1e40", false, FLT_MAX},
1152         {"-1e40", false, -FLT_MAX},
1153         {"1e400", false, FLT_MAX},
1154         {"-1e400", false, -FLT_MAX},
1155     })),);
1156
1157 using FloatProxyParseOverflowDoubleTest =
1158     ::testing::TestWithParam<OverflowParseCase<double>>;
1159
1160 TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
1161   std::istringstream input(GetParam().input);
1162   HexFloat<FloatProxy<double>> value(0.0);
1163   input >> value;
1164   EXPECT_NE(GetParam().expect_success, input.fail());
1165   if (GetParam().expect_success) {
1166     EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
1167   }
1168 }
1169
1170 INSTANTIATE_TEST_CASE_P(
1171     DoubleOverflow, FloatProxyParseOverflowDoubleTest,
1172     ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
1173         {"0", true, 0.0},
1174         {"0.0", true, 0.0},
1175         {"1.0", true, 1.0},
1176         {"1e38", true, 1e38},
1177         {"-1e38", true, -1e38},
1178         {"1e40", true, 1e40},
1179         {"-1e40", true, -1e40},
1180         {"1e400", false, DBL_MAX},
1181         {"-1e400", false, -DBL_MAX},
1182     })),);
1183
1184 using FloatProxyParseOverflowFloat16Test =
1185     ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
1186
1187 TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
1188   std::istringstream input(GetParam().input);
1189   HexFloat<FloatProxy<Float16>> value(0);
1190   input >> value;
1191   EXPECT_NE(GetParam().expect_success, input.fail()) << " literal: "
1192                                                      << GetParam().input;
1193   if (GetParam().expect_success) {
1194     EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
1195         << " literal: " << GetParam().input;
1196   }
1197 }
1198
1199 INSTANTIATE_TEST_CASE_P(
1200     Float16Overflow, FloatProxyParseOverflowFloat16Test,
1201     ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
1202         {"0", true, uint16_t{0}},
1203         {"0.0", true, uint16_t{0}},
1204         {"1.0", true, uint16_t{0x3c00}},
1205         // Overflow for 16-bit float is an error, and returns max or
1206         // lowest value.
1207         {"1e38", false, uint16_t{0x7bff}},
1208         {"1e40", false, uint16_t{0x7bff}},
1209         {"1e400", false, uint16_t{0x7bff}},
1210         {"-1e38", false, uint16_t{0xfbff}},
1211         {"-1e40", false, uint16_t{0xfbff}},
1212         {"-1e400", false, uint16_t{0xfbff}},
1213     })),);
1214
1215 TEST(FloatProxy, Max) {
1216   EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
1217               Eq(uint16_t{0x7bff}));
1218   EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
1219               Eq(std::numeric_limits<float>::max()));
1220   EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
1221               Eq(std::numeric_limits<double>::max()));
1222 }
1223
1224 TEST(FloatProxy, Lowest) {
1225   EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
1226               Eq(uint16_t{0xfbff}));
1227   EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
1228               Eq(std::numeric_limits<float>::lowest()));
1229   EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
1230               Eq(std::numeric_limits<double>::lowest()));
1231 }
1232
1233 // TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
1234 }  // anonymous namespace