1 // Copyright (c) 2015-2016 The Khronos Group Inc.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
22 #include <gmock/gmock.h>
24 #include "source/util/hex_float.h"
25 #include "unit_spirv.h"
29 using spvutils::BitwiseCast;
30 using spvutils::Float16;
31 using spvutils::FloatProxy;
32 using spvutils::HexFloat;
33 using spvutils::ParseNormalFloat;
35 // In this file "encode" means converting a number into a string,
36 // and "decode" means converting a string into a number.
39 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
40 using DecodeHexFloatTest =
41 ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
43 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
44 using DecodeHexDoubleTest =
45 ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
47 // Hex-encodes a float value.
49 std::string EncodeViaHexFloat(const T& value) {
51 ss << spvutils::HexFloat<T>(value);
55 // The following two tests can't be DRY because they take different parameter
58 TEST_P(HexFloatTest, EncodeCorrectly) {
59 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
62 TEST_P(HexDoubleTest, EncodeCorrectly) {
63 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
66 // Decodes a hex-float string.
68 FloatProxy<T> Decode(const std::string& str) {
69 spvutils::HexFloat<FloatProxy<T>> decoded(0.f);
70 EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
71 return decoded.value();
74 TEST_P(HexFloatTest, DecodeCorrectly) {
75 EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
78 TEST_P(HexDoubleTest, DecodeCorrectly) {
79 EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
82 INSTANTIATE_TEST_CASE_P(
83 Float32Tests, HexFloatTest,
84 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
96 {-0.75f, "-0x1.8p-1"},
102 {-1024.f, "-0x1p+10"},
103 {1024.f + 8.f, "0x1.02p+10"},
104 {-1024.f - 8.f, "-0x1.02p+10"},
107 {1.0f / 512.f, "0x1p-9"},
108 {1.0f / -512.f, "-0x1p-9"},
109 {1.0f / 1024.f, "0x1p-10"},
110 {1.0f / -1024.f, "-0x1p-10"},
111 {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
112 {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
115 {float(ldexp(1.0f, -126)), "0x1p-126"},
116 {float(ldexp(-1.0f, -126)), "-0x1p-126"},
118 // Denormalized values
119 {float(ldexp(1.0f, -127)), "0x1p-127"},
120 {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
121 {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
122 {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
123 {float(ldexp(-1.0f, -127)), "-0x1p-127"},
124 {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
125 {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
126 {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
128 {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
129 {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
134 INSTANTIATE_TEST_CASE_P(
135 Float32NanTests, HexFloatTest,
136 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
137 // Various NAN and INF cases
138 {uint32_t(0xFF800000), "-0x1p+128"}, // -inf
139 {uint32_t(0x7F800000), "0x1p+128"}, // inf
140 {uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan
141 {uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan
142 {uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan
143 {uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan
144 {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan
145 {uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan
146 {uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan
147 {uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan
148 {uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan
149 {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan
152 INSTANTIATE_TEST_CASE_P(
153 Float64Tests, HexDoubleTest,
155 std::vector<std::pair<FloatProxy<double>, std::string>>({
167 {-0.75, "-0x1.8p-1"},
173 {-1024., "-0x1p+10"},
174 {1024. + 8., "0x1.02p+10"},
175 {-1024. - 8., "-0x1.02p+10"},
177 // Large outside the range of normal floats
178 {ldexp(1.0, 128), "0x1p+128"},
179 {ldexp(1.0, 129), "0x1p+129"},
180 {ldexp(-1.0, 128), "-0x1p+128"},
181 {ldexp(-1.0, 129), "-0x1p+129"},
182 {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
183 {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
184 {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
185 {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
188 {1.0 / 512., "0x1p-9"},
189 {1.0 / -512., "-0x1p-9"},
190 {1.0 / 1024., "0x1p-10"},
191 {1.0 / -1024., "-0x1p-10"},
192 {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
193 {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
195 // Small outside the range of normal floats
196 {ldexp(1.0, -128), "0x1p-128"},
197 {ldexp(1.0, -129), "0x1p-129"},
198 {ldexp(-1.0, -128), "-0x1p-128"},
199 {ldexp(-1.0, -129), "-0x1p-129"},
200 {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
201 {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
202 {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
203 {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
206 {ldexp(1.0, -1022), "0x1p-1022"},
207 {ldexp(-1.0, -1022), "-0x1p-1022"},
209 // Denormalized values
210 {ldexp(1.0, -1023), "0x1p-1023"},
211 {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
212 {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
213 {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
214 {ldexp(-1.0, -1024), "-0x1p-1024"},
215 {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
216 {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
217 {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
219 {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
220 {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
225 INSTANTIATE_TEST_CASE_P(
226 Float64NanTests, HexDoubleTest,
227 ::testing::ValuesIn(std::vector<
228 std::pair<FloatProxy<double>, std::string>>({
229 // Various NAN and INF cases
230 {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, //-inf
231 {uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, //+inf
232 {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan
233 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan
234 {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan
235 {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan
236 {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan
237 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan
238 {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan
239 {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan
240 {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan
241 {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan
244 TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
246 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
247 << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
248 EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
251 TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
253 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
254 << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
256 EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
259 TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
260 EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
263 TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
264 EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
267 INSTANTIATE_TEST_CASE_P(
268 Float32DecodeTests, DecodeHexFloatTest,
269 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
274 // flush to zero cases
275 {"0x1p-500", 0.f}, // Exponent underflows.
277 {"0x0.00000000001p-126", 0.f}, // Fraction causes underflow.
278 {"-0x0.0000000001p-127", -0.f},
279 {"-0x0.01p-142", -0.f}, // Fraction causes additional underflow.
280 {"0x0.01p-142", 0.f},
282 // Some floats that do not encode the same way as they decode.
289 INSTANTIATE_TEST_CASE_P(
290 Float32DecodeInfTests, DecodeHexFloatTest,
291 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
293 {"-0x1p+128", uint32_t(0xFF800000)}, // -inf
294 {"0x32p+127", uint32_t(0x7F800000)}, // inf
295 {"0x32p+500", uint32_t(0x7F800000)}, // inf
296 {"-0x32p+127", uint32_t(0xFF800000)}, // -inf
299 INSTANTIATE_TEST_CASE_P(
300 Float64DecodeTests, DecodeHexDoubleTest,
302 std::vector<std::pair<std::string, FloatProxy<double>>>({
307 // flush to zero cases
308 {"0x1p-5000", 0.}, // Exponent underflows.
310 {"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow.
311 {"-0x0.000000000000001p-1024", -0.},
312 {"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow.
313 {"0x0.01p-1090", 0.},
315 // Some floats that do not encode the same way as they decode.
322 INSTANTIATE_TEST_CASE_P(
323 Float64DecodeInfTests, DecodeHexDoubleTest,
325 std::vector<std::pair<std::string, FloatProxy<double>>>({
327 {"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf
328 {"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf
329 {"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf
330 {"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf
333 TEST(FloatProxy, ValidConversion) {
334 EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
335 EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
336 EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
337 EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
338 EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
339 EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
341 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
342 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
343 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
344 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
345 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
346 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
347 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
348 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
349 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
350 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
351 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
352 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
354 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
355 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
356 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
357 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
358 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
359 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
360 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
361 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
362 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
363 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
364 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
365 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
368 TEST(FloatProxy, Nan) {
369 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
370 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
371 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
372 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
373 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
374 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
375 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
376 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
377 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
378 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
381 TEST(FloatProxy, Negation) {
382 EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
383 EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
385 EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
386 EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
388 EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
389 EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
391 EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
392 EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
395 (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
396 Eq(-std::numeric_limits<float>::infinity()));
397 EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
399 Eq(std::numeric_limits<float>::infinity()));
402 // Test conversion of FloatProxy values to strings.
404 // In previous cases, we always wrapped the FloatProxy value in a HexFloat
405 // before conversion to a string. In the following cases, the FloatProxy
406 // decides for itself whether to print as a regular number or as a hex float.
408 using FloatProxyFloatTest =
409 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
410 using FloatProxyDoubleTest =
411 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
413 // Converts a float value to a string via a FloatProxy.
414 template <typename T>
415 std::string EncodeViaFloatProxy(const T& value) {
416 std::stringstream ss;
421 // Converts a floating point string so that the exponent prefix
422 // is 'e', and the exponent value does not have leading zeros.
423 // The Microsoft runtime library likes to write things like "2.5E+010".
424 // Convert that to "2.5e+10".
425 // We don't care what happens to strings that are not floating point
427 std::string NormalizeExponentInFloatString(std::string in) {
429 // Reserve one spot for the terminating null, even when the sscanf fails.
430 std::vector<char> prefix(in.size() + 1);
433 int exponent; // in base 10
434 if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
435 &plus_or_minus, &exponent)) &&
436 (e == 'e' || e == 'E') &&
437 (plus_or_minus == '-' || plus_or_minus == '+')) {
438 // It looks like a floating point value with exponent.
439 std::stringstream out;
440 out << prefix.data() << 'e' << plus_or_minus << exponent;
448 TEST(NormalizeFloat, Sample) {
449 EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
450 EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
451 EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
452 EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
453 EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
456 // The following two tests can't be DRY because they take different parameter
458 TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
460 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
461 Eq(GetParam().second));
464 TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
466 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
467 Eq(GetParam().second));
470 INSTANTIATE_TEST_CASE_P(
471 Float32Tests, FloatProxyFloatTest,
472 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
480 // Still normal numbers, but with large magnitude exponents.
481 {float(ldexp(1.f, 126)), "8.50706e+37"},
482 {float(ldexp(-1.f, -126)), "-1.17549e-38"},
484 // denormalized values are printed as hex floats.
485 {float(ldexp(1.0f, -127)), "0x1p-127"},
486 {float(ldexp(1.5f, -128)), "0x1.8p-128"},
487 {float(ldexp(1.25, -129)), "0x1.4p-129"},
488 {float(ldexp(1.125, -130)), "0x1.2p-130"},
489 {float(ldexp(-1.0f, -127)), "-0x1p-127"},
490 {float(ldexp(-1.0f, -128)), "-0x1p-128"},
491 {float(ldexp(-1.0f, -129)), "-0x1p-129"},
492 {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
495 {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
496 {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
498 {std::numeric_limits<float>::infinity(), "0x1p+128"},
499 {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
502 INSTANTIATE_TEST_CASE_P(
503 Float64Tests, FloatProxyDoubleTest,
505 std::vector<std::pair<FloatProxy<double>, std::string>>({
511 // Large outside the range of normal floats
512 {ldexp(1.0, 128), "3.40282366920938e+38"},
513 {ldexp(1.5, 129), "1.02084710076282e+39"},
514 {ldexp(-1.0, 128), "-3.40282366920938e+38"},
515 {ldexp(-1.5, 129), "-1.02084710076282e+39"},
517 // Small outside the range of normal floats
518 {ldexp(1.5, -129), "2.20405190779179e-39"},
519 {ldexp(-1.5, -129), "-2.20405190779179e-39"},
522 {ldexp(1.0, -1022), "2.2250738585072e-308"},
523 {ldexp(-1.0, -1022), "-2.2250738585072e-308"},
525 // Denormalized values
526 {ldexp(1.125, -1023), "0x1.2p-1023"},
527 {ldexp(-1.375, -1024), "-0x1.6p-1024"},
530 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
531 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
534 {std::numeric_limits<double>::infinity(), "0x1p+1024"},
535 {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
539 // double is used so that unbiased_exponent can be used with the output
540 // of ldexp directly.
541 int32_t unbiased_exponent(double f) {
542 return spvutils::HexFloat<spvutils::FloatProxy<float>>(
543 static_cast<float>(f)).getUnbiasedNormalizedExponent();
546 int16_t unbiased_half_exponent(uint16_t f) {
547 return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f)
548 .getUnbiasedNormalizedExponent();
551 TEST(HexFloatOperationTest, UnbiasedExponent) {
553 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
554 EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
555 EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
556 EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
558 EXPECT_EQ(128, unbiased_exponent(ldexp(1.0f, 256)));
560 EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
561 EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
562 EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
563 EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
564 EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
565 // Smallest representable number
566 EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
567 // Should get rounded to 0 first.
568 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
571 // The exponent is represented in the bits 0x7C00
573 EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
574 EXPECT_EQ(3, unbiased_half_exponent(0x4800));
575 EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
576 EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
577 EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
578 EXPECT_EQ(10, unbiased_half_exponent(0x6400));
580 // Smallest representable number
581 EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
584 // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
585 float float_fractions(const std::vector<uint32_t>& fractions) {
587 for(int32_t i: fractions) {
588 f += std::ldexp(1.0f, -i);
593 // Returns the normalized significand of a HexFloat<FloatProxy<float>>
594 // that was created by calling float_fractions with the input fractions,
595 // raised to the power of exp.
596 uint32_t normalized_significand(const std::vector<uint32_t>& fractions, uint32_t exp) {
597 return spvutils::HexFloat<spvutils::FloatProxy<float>>(
598 static_cast<float>(ldexp(float_fractions(fractions), exp)))
599 .getNormalizedSignificand();
602 // Sets the bits from MSB to LSB of the significand part of a float.
603 // For example 0 would set the bit 23 (counting from LSB to MSB),
604 // and 1 would set the 22nd bit.
605 uint32_t bits_set(const std::vector<uint32_t>& bits) {
606 const uint32_t top_bit = 1u << 22u;
608 for(uint32_t i: bits) {
614 // The same as bits_set but for a Float16 value instead of 32-bit floating
616 uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
617 const uint32_t top_bit = 1u << 9u;
619 for(uint32_t i: bits) {
622 return static_cast<uint16_t>(val);
625 TEST(HexFloatOperationTest, NormalizedSignificand) {
626 // For normalized numbers (the following) it should be a simple matter
627 // of getting rid of the top implicit bit
628 EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
629 EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
630 EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
631 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
632 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
633 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
635 // For denormalized numbers we expect the normalized significand to
636 // shift as if it were normalized. This means, in practice that the
637 // top_most set bit will be cut off. Looks very similar to above (on purpose)
638 EXPECT_EQ(bits_set({}), normalized_significand({0}, -127));
639 EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128));
640 EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127));
641 EXPECT_EQ(bits_set({}), normalized_significand({22}, -127));
642 EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127));
645 // Returns the 32-bit floating point value created by
646 // calling setFromSignUnbiasedExponentAndNormalizedSignificand
647 // on a HexFloat<FloatProxy<float>>
648 float set_from_sign(bool negative, int32_t unbiased_exponent,
649 uint32_t significand, bool round_denorm_up) {
650 spvutils::HexFloat<spvutils::FloatProxy<float>> f(0.f);
651 f.setFromSignUnbiasedExponentAndNormalizedSignificand(
652 negative, unbiased_exponent, significand, round_denorm_up);
653 return f.value().getAsFloat();
656 TEST(HexFloatOperationTests,
657 SetFromSignUnbiasedExponentAndNormalizedSignificand) {
659 EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
661 // Tests insertion of various denormalized numbers with and without round up.
662 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, false));
663 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, true));
664 EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
665 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -150, 1, true));
667 EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
668 EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
669 EXPECT_EQ(float_fractions({0, 1, 2, 5}),
670 set_from_sign(false, 0, bits_set({0, 1, 4}), false));
671 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
672 set_from_sign(false, -32, bits_set({0, 1, 4}), false));
673 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
674 set_from_sign(false, -128, bits_set({0, 1, 4}), false));
676 // The negative cases from above.
677 EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
678 EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
679 EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
680 EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
681 set_from_sign(true, 0, bits_set({0, 1, 4}), false));
682 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
683 set_from_sign(true, -32, bits_set({0, 1, 4}), false));
684 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
685 set_from_sign(true, -128, bits_set({0, 1, 4}), false));
688 TEST(HexFloatOperationTests, NonRounding) {
689 // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
690 // except in the denorm case which is a bit more complex.
691 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
692 bool carry_bit = false;
694 spvutils::round_direction rounding[] = {
695 spvutils::round_direction::kToZero,
696 spvutils::round_direction::kToNearestEven,
697 spvutils::round_direction::kToPositiveInfinity,
698 spvutils::round_direction::kToNegativeInfinity};
700 // Everything fits, so this should be straight-forward
701 for (spvutils::round_direction round : rounding) {
702 EXPECT_EQ(bits_set({}), HF(0.f).getRoundedNormalizedSignificand<HF>(
704 EXPECT_FALSE(carry_bit);
706 EXPECT_EQ(bits_set({0}),
707 HF(float_fractions({0, 1}))
708 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
709 EXPECT_FALSE(carry_bit);
711 EXPECT_EQ(bits_set({1, 3}),
712 HF(float_fractions({0, 2, 4}))
713 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
714 EXPECT_FALSE(carry_bit);
718 HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
719 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
720 EXPECT_FALSE(carry_bit);
723 bits_set({0, 1, 4, 22}),
724 HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
725 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
726 EXPECT_FALSE(carry_bit);
730 using RD = spvutils::round_direction;
731 struct RoundSignificandCase {
733 std::pair<int16_t, bool> expected_results;
734 spvutils::round_direction round;
737 using HexFloatRoundTest =
738 ::testing::TestWithParam<RoundSignificandCase>;
740 TEST_P(HexFloatRoundTest, RoundDownToFP16) {
741 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
742 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
744 HF input_value(GetParam().source_float);
745 bool carry_bit = false;
746 EXPECT_EQ(GetParam().expected_results.first,
747 input_value.getRoundedNormalizedSignificand<HF16>(
748 GetParam().round, &carry_bit));
749 EXPECT_EQ(carry_bit, GetParam().expected_results.second);
753 INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatRoundTest,
754 ::testing::ValuesIn(std::vector<RoundSignificandCase>(
756 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
757 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
758 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
759 {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
760 {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
762 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
763 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
764 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
765 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
767 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
768 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
769 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
770 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
772 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
773 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
774 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
775 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
777 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
778 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
779 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
780 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
782 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
783 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
784 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
785 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
788 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
789 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
790 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
791 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
793 // Cases where original number was denorm. Note: this should have no effect
794 // the number is pre-normalized.
795 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
796 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
797 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
798 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
802 struct UpCastSignificandCase {
803 uint16_t source_half;
804 uint32_t expected_result;
807 using HexFloatRoundUpSignificandTest =
808 ::testing::TestWithParam<UpCastSignificandCase>;
809 TEST_P(HexFloatRoundUpSignificandTest, Widening) {
810 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
811 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
812 bool carry_bit = false;
814 spvutils::round_direction rounding[] = {
815 spvutils::round_direction::kToZero,
816 spvutils::round_direction::kToNearestEven,
817 spvutils::round_direction::kToPositiveInfinity,
818 spvutils::round_direction::kToNegativeInfinity};
820 // Everything fits, so everything should just be bit-shifts.
821 for (spvutils::round_direction round : rounding) {
823 HF16 input_value(GetParam().source_half);
825 GetParam().expected_result,
826 input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
828 << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
829 << " 0x" << GetParam().expected_result;
830 EXPECT_FALSE(carry_bit);
834 INSTANTIATE_TEST_CASE_P(F16toF32, HexFloatRoundUpSignificandTest,
835 // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
836 // They are ignored for this test.
837 ::testing::ValuesIn(std::vector<UpCastSignificandCase>(
845 struct DownCastTest {
847 uint16_t expected_half;
848 std::vector<spvutils::round_direction> directions;
851 std::string get_round_text(spvutils::round_direction direction) {
852 #define CASE(round_direction) \
853 case round_direction: \
854 return #round_direction
857 CASE(spvutils::round_direction::kToZero);
858 CASE(spvutils::round_direction::kToPositiveInfinity);
859 CASE(spvutils::round_direction::kToNegativeInfinity);
860 CASE(spvutils::round_direction::kToNearestEven);
866 using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
868 TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
869 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
870 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
871 HF f(GetParam().source_float);
872 for (auto round : GetParam().directions) {
874 f.castTo(half, round);
875 EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
876 << get_round_text(round) << " " << std::hex
877 << spvutils::BitwiseCast<uint32_t>(GetParam().source_float)
878 << " cast to: " << half.value().getAsFloat().get_value();
882 const uint16_t positive_infinity = 0x7C00;
883 const uint16_t negative_infinity = 0xFC00;
885 INSTANTIATE_TEST_CASE_P(F32ToF16, HexFloatFP32To16Tests,
886 ::testing::ValuesIn(std::vector<DownCastTest>(
888 // Exactly representable as half.
889 {0.f, 0x0, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
890 {-0.f, 0x8000, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
891 {1.0f, 0x3C00, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
892 {-1.0f, 0xBC00, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
894 {float_fractions({0, 1, 10}) , 0x3E01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
895 {-float_fractions({0, 1, 10}) , 0xBE01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
896 {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 0x4A01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
897 {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 0xCA01, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
901 {static_cast<float>(ldexp(1.0f, -25)), 0x0, {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
902 {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
903 {static_cast<float>(-ldexp(1.0f, -25)), 0x8000, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
904 {static_cast<float>(-ldexp(1.0f, -25)), 0x8001, {RD::kToNegativeInfinity}},
905 {static_cast<float>(ldexp(1.0f, -24)), 0x1, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
908 {static_cast<float>(ldexp(1.0f, 16)), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
909 {static_cast<float>(ldexp(1.0f, 18)), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
910 {static_cast<float>(ldexp(1.3f, 16)), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
911 {static_cast<float>(-ldexp(1.0f, 16)), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
912 {static_cast<float>(-ldexp(1.0f, 18)), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
913 {static_cast<float>(-ldexp(1.3f, 16)), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
915 // Transfer of Infinities
916 {std::numeric_limits<float>::infinity(), positive_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
917 {-std::numeric_limits<float>::infinity(), negative_infinity, {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity, RD::kToNearestEven}},
919 // Nans are below because we cannot test for equality.
923 uint16_t source_half;
924 float expected_float;
927 using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
928 TEST_P(HexFloatFP16To32Tests, WideningCasts) {
929 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
930 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
931 HF16 f(GetParam().source_half);
933 spvutils::round_direction rounding[] = {
934 spvutils::round_direction::kToZero,
935 spvutils::round_direction::kToNearestEven,
936 spvutils::round_direction::kToPositiveInfinity,
937 spvutils::round_direction::kToNegativeInfinity};
939 // Everything fits, so everything should just be bit-shifts.
940 for (spvutils::round_direction round : rounding) {
942 f.castTo(flt, round);
943 EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
944 << get_round_text(round) << " " << std::hex
945 << spvutils::BitwiseCast<uint16_t>(GetParam().source_half)
946 << " cast to: " << flt.value().getAsFloat();
950 INSTANTIATE_TEST_CASE_P(F16ToF32, HexFloatFP16To32Tests,
951 ::testing::ValuesIn(std::vector<UpCastCase>(
957 {0x3F00, float_fractions({0, 1, 2})},
958 {0xBF00, -float_fractions({0, 1, 2})},
959 {0x3F01, float_fractions({0, 1, 2, 10})},
960 {0xBF01, -float_fractions({0, 1, 2, 10})},
963 {0x0001, static_cast<float>(ldexp(1.0, -24))},
964 {0x0002, static_cast<float>(ldexp(1.0, -23))},
965 {0x8001, static_cast<float>(-ldexp(1.0, -24))},
966 {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
969 {0x7C00, std::numeric_limits<float>::infinity()},
970 {0xFC00, -std::numeric_limits<float>::infinity()},
973 TEST(HexFloatOperationTests, NanTests) {
974 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
975 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
976 spvutils::round_direction rounding[] = {
977 spvutils::round_direction::kToZero,
978 spvutils::round_direction::kToNearestEven,
979 spvutils::round_direction::kToPositiveInfinity,
980 spvutils::round_direction::kToNegativeInfinity};
982 // Everything fits, so everything should just be bit-shifts.
983 for (spvutils::round_direction round : rounding) {
986 HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
987 EXPECT_TRUE(f16.value().isNan());
988 HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
989 EXPECT_TRUE(f16.value().isNan());
991 HF16(0x7C01).castTo(f, round);
992 EXPECT_TRUE(f.value().isNan());
993 HF16(0x7C11).castTo(f, round);
994 EXPECT_TRUE(f.value().isNan());
995 HF16(0xFC01).castTo(f, round);
996 EXPECT_TRUE(f.value().isNan());
997 HF16(0x7C10).castTo(f, round);
998 EXPECT_TRUE(f.value().isNan());
999 HF16(0xFF00).castTo(f, round);
1000 EXPECT_TRUE(f.value().isNan());
1004 // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
1005 template <typename T>
1006 struct FloatParseCase {
1007 std::string literal;
1009 bool expect_success;
1010 HexFloat<FloatProxy<T>> expected_value;
1013 using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
1015 TEST_P(ParseNormalFloatTest, Samples) {
1016 std::stringstream input(GetParam().literal);
1017 HexFloat<FloatProxy<float>> parsed_value(0.0f);
1018 ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1019 EXPECT_NE(GetParam().expect_success, input.fail())
1020 << " literal: " << GetParam().literal
1021 << " negate: " << GetParam().negate_value;
1022 if (GetParam().expect_success) {
1023 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1024 << " literal: " << GetParam().literal
1025 << " negate: " << GetParam().negate_value;
1029 // Returns a FloatParseCase with expected failure.
1030 template <typename T>
1031 FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
1033 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1034 return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
1037 // Returns a FloatParseCase that should successfully parse to a given value.
1038 template <typename T>
1039 FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
1041 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1042 return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
1045 INSTANTIATE_TEST_CASE_P(
1046 FloatParse, ParseNormalFloatTest,
1047 ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
1048 // Failing cases due to trivially incorrect syntax.
1049 BadFloatParseCase("abc", false, 0.0f),
1050 BadFloatParseCase("abc", true, 0.0f),
1053 GoodFloatParseCase("0", false, 0.0f),
1054 GoodFloatParseCase("0.0", false, 0.0f),
1055 GoodFloatParseCase("-0.0", false, -0.0f),
1056 GoodFloatParseCase("2.0", false, 2.0f),
1057 GoodFloatParseCase("-2.0", false, -2.0f),
1058 GoodFloatParseCase("+2.0", false, 2.0f),
1059 // Cases with negate_value being true.
1060 GoodFloatParseCase("0.0", true, -0.0f),
1061 GoodFloatParseCase("2.0", true, -2.0f),
1063 // When negate_value is true, we should not accept a
1064 // leading minus or plus.
1065 BadFloatParseCase("-0.0", true, 0.0f),
1066 BadFloatParseCase("-2.0", true, 0.0f),
1067 BadFloatParseCase("+0.0", true, 0.0f),
1068 BadFloatParseCase("+2.0", true, 0.0f),
1070 // Overflow is an error for 32-bit float parsing.
1071 BadFloatParseCase("1e40", false, FLT_MAX),
1072 BadFloatParseCase("1e40", true, -FLT_MAX),
1073 BadFloatParseCase("-1e40", false, -FLT_MAX),
1074 // We can't have -1e40 and negate_value == true since
1075 // that represents an original case of "--1e40" which
1079 using ParseNormalFloat16Test =
1080 ::testing::TestWithParam<FloatParseCase<Float16>>;
1082 TEST_P(ParseNormalFloat16Test, Samples) {
1083 std::stringstream input(GetParam().literal);
1084 HexFloat<FloatProxy<Float16>> parsed_value(0);
1085 ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1086 EXPECT_NE(GetParam().expect_success, input.fail())
1087 << " literal: " << GetParam().literal
1088 << " negate: " << GetParam().negate_value;
1089 if (GetParam().expect_success) {
1090 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1091 << " literal: " << GetParam().literal
1092 << " negate: " << GetParam().negate_value;
1096 INSTANTIATE_TEST_CASE_P(
1097 Float16Parse, ParseNormalFloat16Test,
1098 ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
1099 // Failing cases due to trivially incorrect syntax.
1100 BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
1101 BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
1104 GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
1105 GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
1106 GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
1107 GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
1108 GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
1109 GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
1110 // Cases with negate_value being true.
1111 GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
1112 GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
1114 // When negate_value is true, we should not accept a leading minus or
1116 BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
1117 BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
1118 BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
1119 BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
1122 // A test case for detecting infinities.
1123 template <typename T>
1124 struct OverflowParseCase {
1126 bool expect_success;
1130 using FloatProxyParseOverflowFloatTest =
1131 ::testing::TestWithParam<OverflowParseCase<float>>;
1133 TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
1134 std::istringstream input(GetParam().input);
1135 HexFloat<FloatProxy<float>> value(0.0f);
1137 EXPECT_NE(GetParam().expect_success, input.fail());
1138 if (GetParam().expect_success) {
1139 EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
1143 INSTANTIATE_TEST_CASE_P(
1144 FloatOverflow, FloatProxyParseOverflowFloatTest,
1145 ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
1147 {"0.0", true, 0.0f},
1148 {"1.0", true, 1.0f},
1149 {"1e38", true, 1e38f},
1150 {"-1e38", true, -1e38f},
1151 {"1e40", false, FLT_MAX},
1152 {"-1e40", false, -FLT_MAX},
1153 {"1e400", false, FLT_MAX},
1154 {"-1e400", false, -FLT_MAX},
1157 using FloatProxyParseOverflowDoubleTest =
1158 ::testing::TestWithParam<OverflowParseCase<double>>;
1160 TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
1161 std::istringstream input(GetParam().input);
1162 HexFloat<FloatProxy<double>> value(0.0);
1164 EXPECT_NE(GetParam().expect_success, input.fail());
1165 if (GetParam().expect_success) {
1166 EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
1170 INSTANTIATE_TEST_CASE_P(
1171 DoubleOverflow, FloatProxyParseOverflowDoubleTest,
1172 ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
1176 {"1e38", true, 1e38},
1177 {"-1e38", true, -1e38},
1178 {"1e40", true, 1e40},
1179 {"-1e40", true, -1e40},
1180 {"1e400", false, DBL_MAX},
1181 {"-1e400", false, -DBL_MAX},
1184 using FloatProxyParseOverflowFloat16Test =
1185 ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
1187 TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
1188 std::istringstream input(GetParam().input);
1189 HexFloat<FloatProxy<Float16>> value(0);
1191 EXPECT_NE(GetParam().expect_success, input.fail()) << " literal: "
1192 << GetParam().input;
1193 if (GetParam().expect_success) {
1194 EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
1195 << " literal: " << GetParam().input;
1199 INSTANTIATE_TEST_CASE_P(
1200 Float16Overflow, FloatProxyParseOverflowFloat16Test,
1201 ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
1202 {"0", true, uint16_t{0}},
1203 {"0.0", true, uint16_t{0}},
1204 {"1.0", true, uint16_t{0x3c00}},
1205 // Overflow for 16-bit float is an error, and returns max or
1207 {"1e38", false, uint16_t{0x7bff}},
1208 {"1e40", false, uint16_t{0x7bff}},
1209 {"1e400", false, uint16_t{0x7bff}},
1210 {"-1e38", false, uint16_t{0xfbff}},
1211 {"-1e40", false, uint16_t{0xfbff}},
1212 {"-1e400", false, uint16_t{0xfbff}},
1215 TEST(FloatProxy, Max) {
1216 EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
1217 Eq(uint16_t{0x7bff}));
1218 EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
1219 Eq(std::numeric_limits<float>::max()));
1220 EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
1221 Eq(std::numeric_limits<double>::max()));
1224 TEST(FloatProxy, Lowest) {
1225 EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
1226 Eq(uint16_t{0xfbff}));
1227 EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
1228 Eq(std::numeric_limits<float>::lowest()));
1229 EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
1230 Eq(std::numeric_limits<double>::lowest()));
1233 // TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
1234 } // anonymous namespace