Correct few pipeline_barrier tests
authorPiotr Byszewski <piotr.byszewski@mobica.com>
Thu, 21 May 2020 16:45:23 +0000 (18:45 +0200)
committerAlexander Galazin <Alexander.Galazin@arm.com>
Mon, 1 Jun 2020 11:24:26 +0000 (07:24 -0400)
In pipeline barrier tests there is findMaxBufferSize util function.
It consists of two parts. First part checks if we can create buffer
that requires as much space as we want to test, and second, used when
first part fails, searches for maximal buffer that we can create
within that limit. Since test values are multiply of 1024 the second
part of this function was never hit by most of implementations.
It was hit by implementation that overalocates buffers and this
revealed that classes operating on ubo and ssbo were not handling
buffer sizes that are not multiple of 16 in case of ssbo and multiple
of 1024 in case of ubo (rest of RenderPassCommands are handling that
case). Note that this can be tested by just commenting first part of
findMaxBufferSize function.
This change make sure that ubo and ssbo tests work on smaller buffers
but with limitation that last modulo 16 values from those buffers are
not used in tests.

Components: Vulkan

VK-GL-CTS public issue: 206

Affects:
dEQP-VK.memory.pipeline_barrier.*uniform_buffer.*
dEQP-VK.memory.pipeline_barrier.*storage_buffer.*

Change-Id: Ia7d09500116783ac9ea047d18690efa329d625e7

external/vulkancts/modules/vulkan/memory/vktMemoryPipelineBarrierTests.cpp

index 4094d85..9951b04 100644 (file)
@@ -4901,6 +4901,10 @@ public:
        void                                                    submit                                          (SubmitContext& context);
        void                                                    verify                                          (VerifyRenderPassContext&, size_t);
 
+protected:
+
+       deUint32                                                calculateBufferPartSize         (size_t descriptorSetNdx) const;
+
 private:
        PipelineResources                               m_resources;
        vk::Move<vk::VkDescriptorPool>  m_descriptorPool;
@@ -4933,7 +4937,9 @@ void RenderVertexUniformBuffer::prepare (PrepareRenderPassContext& context)
        const vk::Unique<vk::VkShaderModule>            fragmentShaderModule    (vk::createShaderModule(vkd, device, context.getBinaryCollection().get("render-white.frag"), 0));
        vector<vk::VkDescriptorSetLayoutBinding>        bindings;
 
+       // make sure buffer size is multiple of 16 (in glsl we use uvec4 to store 16 values)
        m_bufferSize = context.getBufferSize();
+       m_bufferSize = static_cast<vk::VkDeviceSize>(m_bufferSize / 16u) * 16u;
 
        {
                const vk::VkDescriptorSetLayoutBinding binding =
@@ -4993,9 +4999,7 @@ void RenderVertexUniformBuffer::prepare (PrepareRenderPassContext& context)
                        {
                                context.getBuffer(),
                                (vk::VkDeviceSize)(descriptorSetNdx * (size_t)MAX_UNIFORM_BUFFER_SIZE),
-                               m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                       ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                       : (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
+                               calculateBufferPartSize(descriptorSetNdx)
                        };
                        const vk::VkWriteDescriptorSet                  write           =
                        {
@@ -5025,9 +5029,7 @@ void RenderVertexUniformBuffer::submit (SubmitContext& context)
 
        for (size_t descriptorSetNdx = 0; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
        {
-               const size_t    size    = (size_t)(m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                                               ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                                               : (size_t)MAX_UNIFORM_BUFFER_SIZE);
+               const size_t    size    = calculateBufferPartSize(descriptorSetNdx);
                const deUint32  count   = (deUint32)(size / 2);
 
                vkd.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *m_resources.pipelineLayout, 0u, 1u, &m_descriptorSets[descriptorSetNdx], 0u, DE_NULL);
@@ -5040,9 +5042,7 @@ void RenderVertexUniformBuffer::verify (VerifyRenderPassContext& context, size_t
        for (size_t descriptorSetNdx = 0; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
        {
                const size_t    offset  = descriptorSetNdx * MAX_UNIFORM_BUFFER_SIZE;
-               const size_t    size    = (size_t)(m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                                               ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                                               : (size_t)MAX_UNIFORM_BUFFER_SIZE);
+               const size_t    size    = calculateBufferPartSize(descriptorSetNdx);
                const size_t    count   = size / 2;
 
                for (size_t pos = 0; pos < count; pos++)
@@ -5055,6 +5055,14 @@ void RenderVertexUniformBuffer::verify (VerifyRenderPassContext& context, size_t
        }
 }
 
+deUint32 RenderVertexUniformBuffer::calculateBufferPartSize(size_t descriptorSetNdx) const
+{
+       deUint32 size = static_cast<deUint32>(m_bufferSize) - static_cast<deUint32>(descriptorSetNdx) * MAX_UNIFORM_BUFFER_SIZE;
+       if (size < MAX_UNIFORM_BUFFER_SIZE)
+               return size;
+       return MAX_UNIFORM_BUFFER_SIZE;
+}
+
 class RenderVertexUniformTexelBuffer : public RenderPassCommand
 {
 public:
@@ -5985,6 +5993,10 @@ public:
        void                                                    submit                                                  (SubmitContext& context);
        void                                                    verify                                                  (VerifyRenderPassContext&, size_t);
 
+protected:
+
+       deUint32                                                calculateBufferPartSize                 (size_t descriptorSetNdx) const;
+
 private:
        PipelineResources                               m_resources;
        vk::Move<vk::VkDescriptorPool>  m_descriptorPool;
@@ -5993,6 +6005,7 @@ private:
        vk::VkDeviceSize                                m_bufferSize;
        size_t                                                  m_targetWidth;
        size_t                                                  m_targetHeight;
+       deUint32                                                m_valuesPerPixel;
 };
 
 RenderFragmentUniformBuffer::~RenderFragmentUniformBuffer (void)
@@ -6019,7 +6032,9 @@ void RenderFragmentUniformBuffer::prepare (PrepareRenderPassContext& context)
        const vk::Unique<vk::VkShaderModule>            fragmentShaderModule    (vk::createShaderModule(vkd, device, context.getBinaryCollection().get("uniform-buffer.frag"), 0));
        vector<vk::VkDescriptorSetLayoutBinding>        bindings;
 
+       // make sure buffer is smaller then MAX_SIZE and is multiple of 16 (in glsl we use uvec4 to store 16 values)
        m_bufferSize    = de::min(context.getBufferSize(), (vk::VkDeviceSize)MAX_SIZE);
+       m_bufferSize    = static_cast<vk::VkDeviceSize>(m_bufferSize / 16u) * 16u;
        m_targetWidth   = context.getTargetWidth();
        m_targetHeight  = context.getTargetHeight();
 
@@ -6039,7 +6054,7 @@ void RenderFragmentUniformBuffer::prepare (PrepareRenderPassContext& context)
        {
                vk::VK_SHADER_STAGE_FRAGMENT_BIT,
                0u,
-               8u
+               12u
        };
 
        createPipelineWithResources(vkd, device, renderPass, subpass, *vertexShaderModule, *fragmentShaderModule, context.getTargetWidth(), context.getTargetHeight(),
@@ -6065,6 +6080,8 @@ void RenderFragmentUniformBuffer::prepare (PrepareRenderPassContext& context)
 
                m_descriptorPool = vk::createDescriptorPool(vkd, device, &createInfo);
                m_descriptorSets.resize(descriptorCount);
+
+               m_valuesPerPixel = (deUint32)divRoundUp<size_t>(descriptorCount * de::min<size_t>((size_t)m_bufferSize / 4, MAX_UNIFORM_BUFFER_SIZE / 4), m_targetWidth * m_targetHeight);
        }
 
        for (size_t descriptorSetNdx = 0; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
@@ -6087,9 +6104,7 @@ void RenderFragmentUniformBuffer::prepare (PrepareRenderPassContext& context)
                        {
                                context.getBuffer(),
                                (vk::VkDeviceSize)(descriptorSetNdx * (size_t)MAX_UNIFORM_BUFFER_SIZE),
-                               m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                       ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
-                                       : (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
+                               calculateBufferPartSize(descriptorSetNdx)
                        };
                        const vk::VkWriteDescriptorSet                  write           =
                        {
@@ -6123,10 +6138,12 @@ void RenderFragmentUniformBuffer::submit (SubmitContext& context)
                {
                        const deUint32  callId;
                        const deUint32  valuesPerPixel;
+                       const deUint32  bufferSize;
                } callParams =
                {
                        (deUint32)descriptorSetNdx,
-                       (deUint32)divRoundUp<size_t>(m_descriptorSets.size() * (MAX_UNIFORM_BUFFER_SIZE / 4), m_targetWidth * m_targetHeight)
+                       m_valuesPerPixel,
+                       calculateBufferPartSize(descriptorSetNdx) / 16u
                };
 
                vkd.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *m_resources.pipelineLayout, 0u, 1u, &m_descriptorSets[descriptorSetNdx], 0u, DE_NULL);
@@ -6137,35 +6154,34 @@ void RenderFragmentUniformBuffer::submit (SubmitContext& context)
 
 void RenderFragmentUniformBuffer::verify (VerifyRenderPassContext& context, size_t)
 {
-       const deUint32  valuesPerPixel  = (deUint32)divRoundUp<size_t>(m_descriptorSets.size() * (MAX_UNIFORM_BUFFER_SIZE / 4), m_targetWidth * m_targetHeight);
-       const size_t    arraySize               = MAX_UNIFORM_BUFFER_SIZE / (sizeof(deUint32) * 4);
-       const size_t    arrayIntSize    = arraySize * 4;
+       const size_t    arrayIntSize    = MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32);
 
        for (int y = 0; y < context.getReferenceTarget().getSize().y(); y++)
        for (int x = 0; x < context.getReferenceTarget().getSize().x(); x++)
        {
-               const size_t firstDescriptorSetNdx = de::min<size_t>((y * 256u + x) / (arrayIntSize / valuesPerPixel), m_descriptorSets.size() - 1);
+               const deUint32  id                                              = (deUint32)y * 256u + (deUint32)x;
+               const size_t    firstDescriptorSetNdx   = de::min<size_t>(id / (arrayIntSize / m_valuesPerPixel), m_descriptorSets.size() - 1);
 
                for (size_t descriptorSetNdx = firstDescriptorSetNdx; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
                {
                        const size_t    offset  = descriptorSetNdx * MAX_UNIFORM_BUFFER_SIZE;
                        const deUint32  callId  = (deUint32)descriptorSetNdx;
+                       const deUint32  count   = calculateBufferPartSize(descriptorSetNdx) / 16u;
 
-                       const deUint32  id              = callId * ((deUint32)arrayIntSize / valuesPerPixel) + (deUint32)y * 256u + (deUint32)x;
-
-                       if (y * 256u + x < callId * (arrayIntSize / valuesPerPixel))
+                       if (id < callId * (arrayIntSize / m_valuesPerPixel))
                                continue;
                        else
                        {
                                deUint32 value = id;
 
-                               for (deUint32 i = 0; i < valuesPerPixel; i++)
+                               for (deUint32 i = 0; i < m_valuesPerPixel; i++)
                                {
-                                       value   = ((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 0))
-                                                       | (((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 1)) << 8u)
-                                                       | (((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 2)) << 16u)
-                                                       | (((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 3)) << 24u);
-
+                                       // in shader UBO has up to 64 items of uvec4, each uvec4 contains 16 values
+                                       size_t index = offset + size_t((value % count) * 16u) + size_t((value % 4u) * 4u);
+                                       value   = (((deUint32)context.getReference().get(index + 0)))
+                                                       | (((deUint32)context.getReference().get(index + 1)) << 8u)
+                                                       | (((deUint32)context.getReference().get(index + 2)) << 16u)
+                                                       | (((deUint32)context.getReference().get(index + 3)) << 24u);
                                }
                                const UVec4     vec     ((value >>  0u) & 0xFFu,
                                                                 (value >>  8u) & 0xFFu,
@@ -6178,6 +6194,14 @@ void RenderFragmentUniformBuffer::verify (VerifyRenderPassContext& context, size
        }
 }
 
+deUint32 RenderFragmentUniformBuffer::calculateBufferPartSize(size_t descriptorSetNdx) const
+{
+       deUint32 size = static_cast<deUint32>(m_bufferSize) - static_cast<deUint32>(descriptorSetNdx) * MAX_UNIFORM_BUFFER_SIZE;
+       if (size < MAX_UNIFORM_BUFFER_SIZE)
+               return size;
+       return MAX_UNIFORM_BUFFER_SIZE;
+}
+
 class RenderFragmentStorageBuffer : public RenderPassCommand
 {
 public:
@@ -6225,7 +6249,9 @@ void RenderFragmentStorageBuffer::prepare (PrepareRenderPassContext& context)
        const vk::Unique<vk::VkShaderModule>            fragmentShaderModule    (vk::createShaderModule(vkd, device, context.getBinaryCollection().get("storage-buffer.frag"), 0));
        vector<vk::VkDescriptorSetLayoutBinding>        bindings;
 
+       // make sure buffer size is multiple of 16 (in glsl we use uvec4 to store 16 values)
        m_bufferSize    = context.getBufferSize();
+       m_bufferSize    = static_cast<vk::VkDeviceSize>(m_bufferSize / 16u) * 16u;
        m_targetWidth   = context.getTargetWidth();
        m_targetHeight  = context.getTargetHeight();
 
@@ -6583,7 +6609,7 @@ void RenderFragmentUniformTexelBuffer::verify (VerifyRenderPassContext& context,
 
                                for (deUint32 i = 0; i < valuesPerPixel; i++)
                                {
-                                       value   = ((deUint32)context.getReference().get( offset + (value % count) * 4 + 0))
+                                       value   =  ((deUint32)context.getReference().get(offset + (value % count) * 4 + 0))
                                                        | (((deUint32)context.getReference().get(offset + (value % count) * 4 + 1)) << 8u)
                                                        | (((deUint32)context.getReference().get(offset + (value % count) * 4 + 2)) << 16u)
                                                        | (((deUint32)context.getReference().get(offset + (value % count) * 4 + 3)) << 24u);
@@ -9672,15 +9698,16 @@ struct AddPrograms
                                        "{\n"
                                        "\tuint callId;\n"
                                        "\tuint valuesPerPixel;\n"
+                                       "\tuint bufferSize;\n"
                                        "} pushC;\n"
                                        "void main (void) {\n"
-                                       "\thighp uint id = pushC.callId * (" << arrayIntSize << "u / pushC.valuesPerPixel) + uint(gl_FragCoord.y) * 256u + uint(gl_FragCoord.x);\n"
+                                       "\thighp uint id = uint(gl_FragCoord.y) * 256u + uint(gl_FragCoord.x);\n"
                                        "\tif (uint(gl_FragCoord.y) * 256u + uint(gl_FragCoord.x) < pushC.callId * (" << arrayIntSize  << "u / pushC.valuesPerPixel))\n"
                                        "\t\tdiscard;\n"
                                        "\thighp uint value = id;\n"
                                        "\tfor (uint i = 0u; i < pushC.valuesPerPixel; i++)\n"
                                        "\t{\n"
-                                       "\t\thighp uvec4 vecVal = block.values[(value / 4u) % " << arraySize << "u];\n"
+                                       "\t\thighp uvec4 vecVal = block.values[value % pushC.bufferSize];\n"
                                        "\t\tif ((value % 4u) == 0u)\n"
                                        "\t\t\tvalue = vecVal.x;\n"
                                        "\t\telse if ((value % 4u) == 1u)\n"