VK_KHR_16bit_storage: granularity stress test
authorArkadiusz Sarwa <arkadiusz.sarwa@mobica.com>
Fri, 20 Oct 2017 10:47:28 +0000 (12:47 +0200)
committerAlexander Galazin <Alexander.Galazin@arm.com>
Mon, 11 Dec 2017 14:23:07 +0000 (09:23 -0500)
New tests:
dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.uniform_16_to_16.stress_test

Components: Vulkan

VK-GL-CTS issue: 419

Change-Id: Id1a2d5d726e3fa293b07d9385d548fdfe071a681

android/cts/master/vk-master.txt
external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp
external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmComputeShaderCase.cpp
external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmComputeShaderTestUtil.hpp
external/vulkancts/mustpass/1.1.0/vk-default.txt

index 61c0731..a9632f3 100755 (executable)
@@ -173273,6 +173273,7 @@ dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.scalar_uint
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.vector_sint
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.vector_uint
+dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.uniform_16_to_16.stress_test
 dEQP-VK.spirv_assembly.instruction.compute.ubo_padding.mat2x2
 dEQP-VK.spirv_assembly.instruction.compute.conditional_branch.same_labels_true
 dEQP-VK.spirv_assembly.instruction.compute.conditional_branch.same_labels_false
index e6e0b2d..ba2078e 100644 (file)
@@ -162,6 +162,16 @@ bool graphicsCheck16BitFloats (const std::vector<Resource>&        originalFloats,
        return true;
 }
 
+bool computeCheckBuffersFloats (const std::vector<BufferSp>&   originalFloats,
+                                                               const vector<AllocationSp>&             outputAllocs,
+                                                               const std::vector<BufferSp>&    /*expectedOutputs*/,
+                                                               tcu::TestLog&                                   /*log*/)
+{
+       std::vector<deUint8> result;
+       originalFloats.front()->getBytes(result);
+       return deMemCmp(&result[0], outputAllocs.front()->getHostPtr(), result.size()) == 0;
+}
+
 template<RoundingModeFlags RoundingMode>
 bool computeCheck16BitFloats (const std::vector<BufferSp>&     originalFloats,
                                                          const vector<AllocationSp>&   outputAllocs,
@@ -1178,6 +1188,87 @@ void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup
                        }
 }
 
+void addCompute16bitStorageUniform16To16Group (tcu::TestCaseGroup* group)
+{
+       tcu::TestContext&               testCtx                         = group->getTestContext();
+       de::Random                              rnd                                     (deStringHash(group->getName()));
+       const int                               numElements                     = 128;
+       const vector<deFloat16> float16Data                     = getFloat16s(rnd, numElements);
+       const vector<deFloat16> float16DummyData        (numElements, 0);
+       ComputeShaderSpec               spec;
+
+       std::ostringstream              shaderTemplate;
+               shaderTemplate<<"OpCapability Shader\n"
+                       << "OpCapability StorageUniformBufferBlock16\n"
+                       << "OpExtension \"SPV_KHR_16bit_storage\"\n"
+                       << "OpMemoryModel Logical GLSL450\n"
+                       << "OpEntryPoint GLCompute %main \"main\" %id\n"
+                       << "OpExecutionMode %main LocalSize 1 1 1\n"
+                       << "OpDecorate %id BuiltIn GlobalInvocationId\n"
+                       << "OpDecorate %f16arr ArrayStride 2\n"
+                       << "OpMemberDecorate %SSBO_IN 0 Coherent\n"
+                       << "OpMemberDecorate %SSBO_OUT 0 Coherent\n"
+                       << "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
+                       << "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
+                       << "OpDecorate %SSBO_IN BufferBlock\n"
+                       << "OpDecorate %SSBO_OUT BufferBlock\n"
+                       << "OpDecorate %ssboIN DescriptorSet 0\n"
+                       << "OpDecorate %ssboOUT DescriptorSet 0\n"
+                       << "OpDecorate %ssboIN Binding 0\n"
+                       << "OpDecorate %ssboOUT Binding 1\n"
+                       << "\n"
+                       << "%bool      = OpTypeBool\n"
+                       << "%void      = OpTypeVoid\n"
+                       << "%voidf     = OpTypeFunction %void\n"
+                       << "%u32       = OpTypeInt 32 0\n"
+                       << "%i32       = OpTypeInt 32 1\n"
+                       << "%uvec3     = OpTypeVector %u32 3\n"
+                       << "%uvec3ptr  = OpTypePointer Input %uvec3\n"
+                       << "%f16       = OpTypeFloat 16\n"
+                       << "%f16ptr    = OpTypePointer Uniform %f16\n"
+                       << "\n"
+                       << "%zero      = OpConstant %i32 0\n"
+                       << "%c_size    = OpConstant %i32 " << numElements << "\n"
+                       << "\n"
+                       << "%f16arr    = OpTypeArray %f16 %c_size\n"
+                       << "%SSBO_IN   = OpTypeStruct %f16arr\n"
+                       << "%SSBO_OUT  = OpTypeStruct %f16arr\n"
+                       << "%up_SSBOIN = OpTypePointer Uniform %SSBO_IN\n"
+                       << "%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
+                       << "%ssboIN    = OpVariable %up_SSBOIN Uniform\n"
+                       << "%ssboOUT   = OpVariable %up_SSBOOUT Uniform\n"
+                       << "\n"
+                       << "%id        = OpVariable %uvec3ptr Input\n"
+                       << "%main      = OpFunction %void None %voidf\n"
+                       << "%label     = OpLabel\n"
+                       << "%idval     = OpLoad %uvec3 %id\n"
+                       << "%x         = OpCompositeExtract %u32 %idval 0\n"
+                       << "%y         = OpCompositeExtract %u32 %idval 1\n"
+                       << "\n"
+                       << "%inlocx     = OpAccessChain %f16ptr %ssboIN %zero %x \n"
+                       << "%valx       = OpLoad %f16 %inlocx\n"
+                       << "%outlocx    = OpAccessChain %f16ptr %ssboOUT %zero %x \n"
+                       << "             OpStore %outlocx %valx\n"
+
+                       << "%inlocy    = OpAccessChain %f16ptr %ssboIN %zero %y \n"
+                       << "%valy      = OpLoad %f16 %inlocy\n"
+                       << "%outlocy   = OpAccessChain %f16ptr %ssboOUT %zero %y \n"
+                       << "             OpStore %outlocy %valy\n"
+                       << "\n"
+                       << "             OpReturn\n"
+                       << "             OpFunctionEnd\n";
+
+       spec.assembly                   = shaderTemplate.str();
+       spec.numWorkGroups              = IVec3(numElements, numElements, 1);
+       spec.verifyIO                   = computeCheckBuffersFloats;
+       spec.coherentMemory             = true;
+       spec.inputs.push_back(BufferSp(new Float16Buffer(float16Data)));
+       spec.outputs.push_back(BufferSp(new Float16Buffer(float16DummyData)));
+       spec.extensions.push_back("VK_KHR_16bit_storage");
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "stress_test", "Granularity stress test", spec));
+}
+
 void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group)
 {
        tcu::TestContext&                               testCtx                 = group->getTestContext();
@@ -3194,6 +3285,8 @@ tcu::TestCaseGroup* create16BitStorageComputeGroup (tcu::TestContext& testCtx)
        addTestGroup(group.get(), "uniform_16_to_32", "16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To32Group);
        addTestGroup(group.get(), "push_constant_16_to_32", "16bit floats/ints to 32bit tests under capability StoragePushConstant16", addCompute16bitStoragePushConstant16To32Group);
 
+       addTestGroup(group.get(), "uniform_16_to_16", "16bit floats/ints to 16bit tests under capability StoragePushConstant16", addCompute16bitStorageUniform16To16Group);
+
        return group.release();
 }
 
index 43c8fd0..2845c44 100644 (file)
@@ -60,7 +60,13 @@ typedef de::SharedPtr<SamplerHandleUp>                               SamplerHandleSp;
  * The memory is created as host visible and passed back as a vk::Allocation
  * instance via outMemory.
  *//*--------------------------------------------------------------------*/
-Move<VkBuffer> createBufferAndBindMemory (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorType dtype, Allocator& allocator, size_t numBytes, AllocationMp* outMemory)
+Move<VkBuffer> createBufferAndBindMemory (const DeviceInterface&       vkdi,
+                                                                                 const VkDevice&                       device,
+                                                                                 VkDescriptorType                      dtype,
+                                                                                 Allocator&                            allocator,
+                                                                                 size_t                                        numBytes,
+                                                                                 AllocationMp*                         outMemory,
+                                                                                 bool                                          coherent = false)
 {
        VkBufferUsageFlags                      usageBit                        = (VkBufferUsageFlags)0;
 
@@ -86,9 +92,9 @@ Move<VkBuffer> createBufferAndBindMemory (const DeviceInterface& vkdi, const VkD
                DE_NULL,                                                                // pQueueFamilyIndices
        };
 
-       Move<VkBuffer>                          buffer                          (createBuffer(vkdi, device, &bufferCreateInfo));
-       const VkMemoryRequirements      requirements            = getBufferMemoryRequirements(vkdi, device, *buffer);
-       AllocationMp                            bufferMemory            = allocator.allocate(requirements, MemoryRequirement::HostVisible);
+       Move<VkBuffer>                          buffer                  (createBuffer(vkdi, device, &bufferCreateInfo));
+       const VkMemoryRequirements      requirements    = getBufferMemoryRequirements(vkdi, device, *buffer);
+       AllocationMp                            bufferMemory    = allocator.allocate(requirements, coherent ? MemoryRequirement::Coherent | MemoryRequirement::HostVisible : MemoryRequirement::HostVisible);
 
        VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
        *outMemory = bufferMemory;
@@ -244,25 +250,30 @@ void copyBufferToImage (const DeviceInterface& vkdi, const VkDevice& device, con
        }
 }
 
-void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data)
+void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data, bool coherent = false)
 {
        void* const hostPtr = destAlloc->getHostPtr();
 
        deMemcpy((deUint8*)hostPtr, data, numBytes);
-       flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
+
+       if (!coherent)
+               flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
 }
 
-void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value)
+void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value, bool coherent = false)
 {
        void* const hostPtr = destAlloc->getHostPtr();
 
        deMemset((deUint8*)hostPtr, value, numBytes);
-       flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
+
+       if (!coherent)
+               flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
 }
 
-void invalidateMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* srcAlloc, size_t numBytes)
+void invalidateMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* srcAlloc, size_t numBytes, bool coherent = false)
 {
-       invalidateMappedMemoryRange(vkdi, device, srcAlloc->getMemory(), srcAlloc->getOffset(), numBytes);
+       if (!coherent)
+               invalidateMappedMemoryRange(vkdi, device, srcAlloc->getMemory(), srcAlloc->getOffset(), numBytes);
 }
 
 /*--------------------------------------------------------------------*//*!
@@ -577,9 +588,9 @@ tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
                        const size_t            numBytes                = inputBytes.size();
 
                        AllocationMp            bufferAlloc;
-                       BufferHandleUp*         buffer                  = new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descType, allocator, numBytes, &bufferAlloc));
+                       BufferHandleUp*         buffer                  = new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descType, allocator, numBytes, &bufferAlloc, m_shaderSpec.coherentMemory));
 
-                       setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front());
+                       setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front(), m_shaderSpec.coherentMemory);
                        inputBuffers.push_back(BufferHandleSp(buffer));
                        inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
                }
@@ -757,9 +768,9 @@ tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
                output->getBytes(outputBytes);
 
                const size_t            numBytes        = outputBytes.size();
-               BufferHandleUp*         buffer          = new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc));
+               BufferHandleUp*         buffer          = new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc, m_shaderSpec.coherentMemory));
 
-               fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff);
+               fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff, m_shaderSpec.coherentMemory);
                descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
                outputBuffers.push_back(BufferHandleSp(buffer));
                outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
@@ -783,6 +794,8 @@ tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
 
        Unique<VkPipeline>                                      computePipeline         (createComputePipeline(vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));
 
+       // Create command buffer and record commands
+
        const VkCommandBufferBeginInfo          cmdBufferBeginInfo      =
        {
                VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,    // sType
@@ -832,7 +845,7 @@ tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
        // Invalidate output memory ranges before checking on host.
        for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
        {
-               invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.outputs[outputNdx]->getByteSize());
+               invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.outputs[outputNdx]->getByteSize(), m_shaderSpec.coherentMemory);
        }
 
        // Check output.
index ffe0804..179eca2 100644 (file)
@@ -234,6 +234,7 @@ struct ComputeShaderSpec
        ComputeVerifyIOFunc                                             verifyIO;
        ComputeVerifyBinaryFunc                                 verifyBinary;
        SpirvVersion                                                    spirvVersion;
+       bool                                                                    coherentMemory;
 
                                                                                        ComputeShaderSpec (void)
                                                                                                : entryPoint                                    ("main")
@@ -244,6 +245,7 @@ struct ComputeShaderSpec
                                                                                                , verifyIO                                              (DE_NULL)
                                                                                                , verifyBinary                                  (DE_NULL)
                                                                                                , spirvVersion                                  (SPIRV_VERSION_1_0)
+                                                                                               , coherentMemory                                (false)
                                                                                        {}
 };
 
index c157166..fb63f94 100755 (executable)
@@ -173274,6 +173274,7 @@ dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.scalar_uint
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.vector_sint
 dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.push_constant_16_to_32.vector_uint
+dEQP-VK.spirv_assembly.instruction.compute.16bit_storage.uniform_16_to_16.stress_test
 dEQP-VK.spirv_assembly.instruction.compute.ubo_padding.mat2x2
 dEQP-VK.spirv_assembly.instruction.compute.conditional_branch.same_labels_true
 dEQP-VK.spirv_assembly.instruction.compute.conditional_branch.same_labels_false