Added tests for NMin, NMax and NClamp instructions
authorJari Komppa <jari.komppa@siru.fi>
Tue, 4 Jul 2017 06:05:05 +0000 (09:05 +0300)
committerAlexander Galazin <Alexander.Galazin@arm.com>
Fri, 14 Jul 2017 11:43:58 +0000 (07:43 -0400)
The NMin, NMax and NClamp instructions are not generated by glslang
nor tested by spriv-assembly.

Due to the fact that the vulkan spec allows implementations that do
not implement NaN handling, the tests have to be somewhat relaxed;
either the NaN is handled correctly, or NaNs are considered huge
values.

All three instructions return NaN in all cases if all parameters are
NaN.

NMin always returns the other value if paired with NaN: if NaNs are
handled, the instruction should pick the other value. If NaNs are
not handled, the other value will always be smaller.

NMax may return either the other value (if NaNs are handled), or NaN
(if NaNs are not handled).

NClamp is only defined if the min and max values are not NaN, so those
cases are not tested, except for the case where all three values are
NaN. If the clamped value is NaN, the result may either be the min
value (if NaNs are handled) or max value (if NaNs are not handled).

In addition to the NaN case testing, various random non-NaN values are
tested for all three instructions.

Affects:

dEQP-VK.spirv_assembly.instruction.compute.opnmin.all
dEQP-VK.spirv_assembly.instruction.compute.opnmax.all
dEQP-VK.spirv_assembly.instruction.compute.opnclamp.all

Components: Vulkan

VK-GL-CTS issue: 171

Change-Id: I59e884d51dac4eee047af2e52eebee3318e57776

android/cts/master/vk-master.txt
external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp
external/vulkancts/mustpass/1.0.3/vk-default.txt

index 06e4af2..ffee8d1 100644 (file)
@@ -161821,6 +161821,9 @@ dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.uint.initialize
 dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.uint.uninitialized
 dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.float.initialized
 dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.float.uninitialized
+dEQP-VK.spirv_assembly.instruction.compute.opnmin.all
+dEQP-VK.spirv_assembly.instruction.compute.opnmax.all
+dEQP-VK.spirv_assembly.instruction.compute.opnclamp.all
 dEQP-VK.spirv_assembly.instruction.compute.android.opsrem.positive
 dEQP-VK.spirv_assembly.instruction.compute.android.opsrem.all
 dEQP-VK.spirv_assembly.instruction.compute.android.opsmod.positive
index 131643a..57c9bfd 100644 (file)
@@ -31,6 +31,7 @@
 #include "tcuStringTemplate.hpp"
 #include "tcuTestLog.hpp"
 #include "tcuVectorUtil.hpp"
+#include "tcuInterval.hpp"
 
 #include "vkDefs.hpp"
 #include "vkDeviceUtil.hpp"
@@ -878,6 +879,400 @@ tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
        return group.release();
 }
 
+bool compareNMin (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+       if (outputAllocs.size() != 1)
+               return false;
+
+       const BufferSp&         expectedOutput                  = expectedOutputs[0];
+       const float* const      expectedOutputAsFloat   = static_cast<const float*>(expectedOutput->data());
+       const float* const      outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+
+       for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+       {
+               const float f0 = expectedOutputAsFloat[idx];
+               const float f1 = outputAsFloat[idx];
+
+               // For NMin, we accept NaN as output if both inputs were NaN.
+               // Otherwise the NaN is the wrong choise, as on architectures that
+               // do not handle NaN, those are huge values.
+               if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
+                       return false;
+       }
+
+       return true;
+}
+
+tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+       vector<float>                                   inputFloats1    (numElements, 0);
+       vector<float>                                   inputFloats2    (numElements, 0);
+       vector<float>                                   outputFloats    (numElements, 0);
+
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+       // Make the first case a full-NAN case.
+       inputFloats1[0] = TCU_NAN;
+       inputFloats2[0] = TCU_NAN;
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // By default, pick the smallest
+               outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
+
+               // Make half of the cases NaN cases
+               if ((ndx & 1) == 0)
+               {
+                       // Alternate between the NaN operand
+                       if ((ndx & 2) == 0)
+                       {
+                               outputFloats[ndx] = inputFloats2[ndx];
+                               inputFloats1[ndx] = TCU_NAN;
+                       }
+                       else
+                       {
+                               outputFloats[ndx] = inputFloats1[ndx];
+                               inputFloats2[ndx] = TCU_NAN;
+                       }
+               }
+       }
+
+       spec.assembly =
+               "OpCapability Shader\n"
+               "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               "OpDecorate %buf BufferBlock\n"
+               "OpDecorate %indata1 DescriptorSet 0\n"
+               "OpDecorate %indata1 Binding 0\n"
+               "OpDecorate %indata2 DescriptorSet 0\n"
+               "OpDecorate %indata2 Binding 1\n"
+               "OpDecorate %outdata DescriptorSet 0\n"
+               "OpDecorate %outdata Binding 2\n"
+               "OpDecorate %f32arr ArrayStride 4\n"
+               "OpMemberDecorate %buf 0 Offset 0\n"
+
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareNMin;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
+bool compareNMax (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+       if (outputAllocs.size() != 1)
+               return false;
+
+       const BufferSp&         expectedOutput                  = expectedOutputs[0];
+       const float* const      expectedOutputAsFloat   = static_cast<const float*>(expectedOutput->data());
+       const float* const      outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+
+       for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+       {
+               const float f0 = expectedOutputAsFloat[idx];
+               const float f1 = outputAsFloat[idx];
+
+               // For NMax, NaN is considered acceptable result, since in
+               // architectures that do not handle NaNs, those are huge values.
+               if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
+                       return false;
+       }
+
+       return true;
+}
+
+tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+       vector<float>                                   inputFloats1    (numElements, 0);
+       vector<float>                                   inputFloats2    (numElements, 0);
+       vector<float>                                   outputFloats    (numElements, 0);
+
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+       // Make the first case a full-NAN case.
+       inputFloats1[0] = TCU_NAN;
+       inputFloats2[0] = TCU_NAN;
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // By default, pick the biggest
+               outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+
+               // Make half of the cases NaN cases
+               if ((ndx & 1) == 0)
+               {
+                       // Alternate between the NaN operand
+                       if ((ndx & 2) == 0)
+                       {
+                               outputFloats[ndx] = inputFloats2[ndx];
+                               inputFloats1[ndx] = TCU_NAN;
+                       }
+                       else
+                       {
+                               outputFloats[ndx] = inputFloats1[ndx];
+                               inputFloats2[ndx] = TCU_NAN;
+                       }
+               }
+       }
+
+       spec.assembly =
+               "OpCapability Shader\n"
+               "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               "OpDecorate %buf BufferBlock\n"
+               "OpDecorate %indata1 DescriptorSet 0\n"
+               "OpDecorate %indata1 Binding 0\n"
+               "OpDecorate %indata2 DescriptorSet 0\n"
+               "OpDecorate %indata2 Binding 1\n"
+               "OpDecorate %outdata DescriptorSet 0\n"
+               "OpDecorate %outdata Binding 2\n"
+               "OpDecorate %f32arr ArrayStride 4\n"
+               "OpMemberDecorate %buf 0 Offset 0\n"
+
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareNMax;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
+bool compareNClamp (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+       if (outputAllocs.size() != 1)
+               return false;
+
+       const BufferSp&         expectedOutput                  = expectedOutputs[0];
+       const float* const      expectedOutputAsFloat   = static_cast<const float*>(expectedOutput->data());
+       const float* const      outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+
+       for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float) / 2; ++idx)
+       {
+               const float e0 = expectedOutputAsFloat[idx * 2];
+               const float e1 = expectedOutputAsFloat[idx * 2 + 1];
+               const float res = outputAsFloat[idx];
+
+               // For NClamp, we have two possible outcomes based on
+               // whether NaNs are handled or not.
+               // If either min or max value is NaN, the result is undefined,
+               // so this test doesn't stress those. If the clamped value is
+               // NaN, and NaNs are handled, the result is min; if NaNs are not
+               // handled, they are big values that result in max.
+               // If all three parameters are NaN, the result should be NaN.
+               if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
+                        (deFloatAbs(e0 - res) < 0.00001f) ||
+                        (deFloatAbs(e1 - res) < 0.00001f)))
+                       return false;
+       }
+
+       return true;
+}
+
+tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+       vector<float>                                   inputFloats1    (numElements, 0);
+       vector<float>                                   inputFloats2    (numElements, 0);
+       vector<float>                                   inputFloats3    (numElements, 0);
+       vector<float>                                   outputFloats    (numElements * 2, 0);
+
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // Results are only defined if max value is bigger than min value.
+               if (inputFloats2[ndx] > inputFloats3[ndx])
+               {
+                       float t = inputFloats2[ndx];
+                       inputFloats2[ndx] = inputFloats3[ndx];
+                       inputFloats3[ndx] = t;
+               }
+
+               // By default, do the clamp, setting both possible answers
+               float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
+
+               float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+               float maxResB = maxResA;
+
+               // Alternate between the NaN cases
+               if (ndx & 1)
+               {
+                       inputFloats1[ndx] = TCU_NAN;
+                       // If NaN is handled, the result should be same as the clamp minimum.
+                       // If NaN is not handled, the result should clamp to the clamp maximum.
+                       maxResA = inputFloats2[ndx];
+                       maxResB = inputFloats3[ndx];
+               }
+               else
+               {
+                       // Not a NaN case - only one legal result.
+                       maxResA = defaultRes;
+                       maxResB = defaultRes;
+               }
+
+               outputFloats[ndx * 2] = maxResA;
+               outputFloats[ndx * 2 + 1] = maxResB;
+       }
+
+       // Make the first case a full-NAN case.
+       inputFloats1[0] = TCU_NAN;
+       inputFloats2[0] = TCU_NAN;
+       inputFloats3[0] = TCU_NAN;
+       outputFloats[0] = TCU_NAN;
+       outputFloats[1] = TCU_NAN;
+
+       spec.assembly =
+               "OpCapability Shader\n"
+               "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               "OpDecorate %buf BufferBlock\n"
+               "OpDecorate %indata1 DescriptorSet 0\n"
+               "OpDecorate %indata1 Binding 0\n"
+               "OpDecorate %indata2 DescriptorSet 0\n"
+               "OpDecorate %indata2 Binding 1\n"
+               "OpDecorate %indata3 DescriptorSet 0\n"
+               "OpDecorate %indata3 Binding 2\n"
+               "OpDecorate %outdata DescriptorSet 0\n"
+               "OpDecorate %outdata Binding 3\n"
+               "OpDecorate %f32arr ArrayStride 4\n"
+               "OpMemberDecorate %buf 0 Offset 0\n"
+
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%indata3    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
+               "%inval3    = OpLoad %f32 %inloc3\n"
+               "%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareNClamp;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
 {
        de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
@@ -7441,7 +7836,9 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        computeTests->addChild(createOpCompositeInsertGroup(testCtx));
        computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
        computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
-
+       computeTests->addChild(createOpNMinGroup(testCtx));
+       computeTests->addChild(createOpNMaxGroup(testCtx));
+       computeTests->addChild(createOpNClampGroup(testCtx));
        {
                de::MovePtr<tcu::TestCaseGroup> computeAndroidTests     (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
 
index ff26b59..0051f0e 100644 (file)
@@ -161818,6 +161818,9 @@ dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.uint.initialize
 dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.uint.uninitialized
 dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.float.initialized
 dEQP-VK.spirv_assembly.instruction.compute.shader_default_output.float.uninitialized
+dEQP-VK.spirv_assembly.instruction.compute.opnmin.all
+dEQP-VK.spirv_assembly.instruction.compute.opnmax.all
+dEQP-VK.spirv_assembly.instruction.compute.opnclamp.all
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.uniform_32_to_16.uniform_buffer_block_scalar_float_rtz
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.uniform_32_to_16.uniform_buffer_block_scalar_float_rte
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.uniform_32_to_16.uniform_buffer_block_scalar_float_unspecified_rnd_mode