Fix subnormal floats in extended image usage tests
authorRicardo Garcia <rgarcia@igalia.com>
Thu, 3 Oct 2019 15:51:07 +0000 (17:51 +0200)
committerAlexander Galazin <Alexander.Galazin@arm.com>
Fri, 11 Oct 2019 09:32:30 +0000 (05:32 -0400)
When transcoding images in the extended image usage bit tests, Inf and
NaN were correctly taken into account and removed from input data.
However, subnormal floats were only being taken into account in the
32-bit float case.

The Vulkan spec regarding the precision and operation of SPIR-V
instructions mentions subnormal values may be flushed to zero in any
shader instruction, including the shader code to transcode images, and
have to be taken into account for half floats too.

Afffected tests:
dEQP-VK.image.extended_usage_bit.*

Components: Vulkan
VK-GL-CTS issue: 1747

Change-Id: I74a5d27b40f773f8d85e86a0a5d04eb0b4f16194
(cherry picked from commit 300b27043f8c93dcce4b3c788b33688ab59867af)

external/vulkancts/modules/vulkan/image/vktImageTranscodingSupportTests.cpp

index b283236..1139bd3 100644 (file)
@@ -51,6 +51,7 @@
 #include "tcuTestLog.hpp"
 #include "tcuRGBA.hpp"
 #include "tcuSurface.hpp"
+#include "tcuFloat.hpp"
 
 #include <vector>
 #include <iomanip>
@@ -121,6 +122,37 @@ BasicTranscodingTestInstance::BasicTranscodingTestInstance (Context& context, co
 {
 }
 
+// The templated functions below work with specializations of tcu::Float as class T. See "tcuFloat.hpp".
+
+// Return smallest floating point normal value preserving the existing sign bit.
+// The smallest normal value has the mantissa bits zeroed out and 1 as the exponent (tough constructBits() expects something else).
+template <class T>
+inline T SmallestFloat (T value)
+{
+       return T::constructBits(value.sign(), -(T::EXPONENT_BIAS - 1), typename T::StorageType(0u));
+}
+
+// Return the largest floating point normal value preserving the existing sign bit.
+// The largest normal value has the mantissa bits all set to 1 and the exponent set to the largest even value (see constructBits() for the details).
+template <class T>
+inline T LargestFloat (T value)
+{
+       return T::constructBits(value.sign(), T::EXPONENT_BIAS, typename T::StorageType((1<<T::MANTISSA_BITS)-1));
+}
+
+// Replace Infs and NaNs with the largest normal value.
+// Replace denormal numbers with the smallest normal value.
+// Leave the rest untouched.
+template <class T>
+void fixFloatIfNeeded(deUint8* ptr_)
+{
+       T* ptr = reinterpret_cast<T*>(ptr_);
+       if (ptr->isInf() || ptr->isNaN())
+               *ptr = LargestFloat<T>(*ptr);
+       else if (ptr->isDenorm())
+               *ptr = SmallestFloat<T>(*ptr);
+}
+
 void BasicTranscodingTestInstance::generateData (deUint8* toFill, size_t size, const VkFormat format)
 {
        const deUint8 pattern[] =
@@ -227,31 +259,15 @@ void BasicTranscodingTestInstance::generateData (deUint8* toFill, size_t size, c
                        if (textureFormat.type == tcu::TextureFormat::HALF_FLOAT)
                        {
                                for (size_t i = 0; i < size; i += 2)
-                               {
-                                       // HALF_FLOAT fix: remove INF and NaN
-                                       if ((toFill[i+1] & 0x7C) == 0x7C)
-                                               toFill[i+1] = 0x00;
-                               }
+                                       fixFloatIfNeeded<tcu::Float16>(toFill + i);
                        }
                        else if (textureFormat.type == tcu::TextureFormat::FLOAT)
                        {
                                for (size_t i = 0; i < size; i += 4)
-                               {
-                                       // HALF_FLOAT fix: remove INF and NaN
-                                       if ((toFill[i+1] & 0x7C) == 0x7C)
-                                               toFill[i+1] = 0x00;
-                               }
+                                       fixFloatIfNeeded<tcu::Float16>(toFill + i);
 
                                for (size_t i = 0; i < size; i += 4)
-                               {
-                                       // FLOAT fix: remove INF, NaN, and denorm
-                                       // Little endian fix
-                                       if (((toFill[i+3] & 0x7F) == 0x7F && (toFill[i+2] & 0x80) == 0x80) || ((toFill[i+3] & 0x7F) == 0x00 && (toFill[i+2] & 0x80) == 0x00))
-                                               toFill[i+3] = 0x01;
-                                       // Big endian fix
-                                       if (((toFill[i+0] & 0x7F) == 0x7F && (toFill[i+1] & 0x80) == 0x80) || ((toFill[i+0] & 0x7F) == 0x00 && (toFill[i+1] & 0x80) == 0x00))
-                                               toFill[i+0] = 0x01;
-                               }
+                                       fixFloatIfNeeded<tcu::Float32>(toFill + i);
                        }
                }
        }