void submit (SubmitContext& context);
void verify (VerifyRenderPassContext&, size_t);
+protected:
+
+ deUint32 calculateBufferPartSize (size_t descriptorSetNdx) const;
+
private:
PipelineResources m_resources;
vk::Move<vk::VkDescriptorPool> m_descriptorPool;
const vk::Unique<vk::VkShaderModule> fragmentShaderModule (vk::createShaderModule(vkd, device, context.getBinaryCollection().get("render-white.frag"), 0));
vector<vk::VkDescriptorSetLayoutBinding> bindings;
+ // make sure buffer size is multiple of 16 (in glsl we use uvec4 to store 16 values)
m_bufferSize = context.getBufferSize();
+ m_bufferSize = static_cast<vk::VkDeviceSize>(m_bufferSize / 16u) * 16u;
{
const vk::VkDescriptorSetLayoutBinding binding =
{
context.getBuffer(),
(vk::VkDeviceSize)(descriptorSetNdx * (size_t)MAX_UNIFORM_BUFFER_SIZE),
- m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- : (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
+ calculateBufferPartSize(descriptorSetNdx)
};
const vk::VkWriteDescriptorSet write =
{
for (size_t descriptorSetNdx = 0; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
{
- const size_t size = (size_t)(m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- : (size_t)MAX_UNIFORM_BUFFER_SIZE);
+ const size_t size = calculateBufferPartSize(descriptorSetNdx);
const deUint32 count = (deUint32)(size / 2);
vkd.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *m_resources.pipelineLayout, 0u, 1u, &m_descriptorSets[descriptorSetNdx], 0u, DE_NULL);
for (size_t descriptorSetNdx = 0; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
{
const size_t offset = descriptorSetNdx * MAX_UNIFORM_BUFFER_SIZE;
- const size_t size = (size_t)(m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- : (size_t)MAX_UNIFORM_BUFFER_SIZE);
+ const size_t size = calculateBufferPartSize(descriptorSetNdx);
const size_t count = size / 2;
for (size_t pos = 0; pos < count; pos++)
}
}
+deUint32 RenderVertexUniformBuffer::calculateBufferPartSize(size_t descriptorSetNdx) const
+{
+ deUint32 size = static_cast<deUint32>(m_bufferSize) - static_cast<deUint32>(descriptorSetNdx) * MAX_UNIFORM_BUFFER_SIZE;
+ if (size < MAX_UNIFORM_BUFFER_SIZE)
+ return size;
+ return MAX_UNIFORM_BUFFER_SIZE;
+}
+
class RenderVertexUniformTexelBuffer : public RenderPassCommand
{
public:
void submit (SubmitContext& context);
void verify (VerifyRenderPassContext&, size_t);
+protected:
+
+ deUint32 calculateBufferPartSize (size_t descriptorSetNdx) const;
+
private:
PipelineResources m_resources;
vk::Move<vk::VkDescriptorPool> m_descriptorPool;
vk::VkDeviceSize m_bufferSize;
size_t m_targetWidth;
size_t m_targetHeight;
+ deUint32 m_valuesPerPixel;
};
RenderFragmentUniformBuffer::~RenderFragmentUniformBuffer (void)
const vk::Unique<vk::VkShaderModule> fragmentShaderModule (vk::createShaderModule(vkd, device, context.getBinaryCollection().get("uniform-buffer.frag"), 0));
vector<vk::VkDescriptorSetLayoutBinding> bindings;
+ // make sure buffer is smaller then MAX_SIZE and is multiple of 16 (in glsl we use uvec4 to store 16 values)
m_bufferSize = de::min(context.getBufferSize(), (vk::VkDeviceSize)MAX_SIZE);
+ m_bufferSize = static_cast<vk::VkDeviceSize>(m_bufferSize / 16u) * 16u;
m_targetWidth = context.getTargetWidth();
m_targetHeight = context.getTargetHeight();
{
vk::VK_SHADER_STAGE_FRAGMENT_BIT,
0u,
- 8u
+ 12u
};
createPipelineWithResources(vkd, device, renderPass, subpass, *vertexShaderModule, *fragmentShaderModule, context.getTargetWidth(), context.getTargetHeight(),
m_descriptorPool = vk::createDescriptorPool(vkd, device, &createInfo);
m_descriptorSets.resize(descriptorCount);
+
+ m_valuesPerPixel = (deUint32)divRoundUp<size_t>(descriptorCount * de::min<size_t>((size_t)m_bufferSize / 4, MAX_UNIFORM_BUFFER_SIZE / 4), m_targetWidth * m_targetHeight);
}
for (size_t descriptorSetNdx = 0; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
{
context.getBuffer(),
(vk::VkDeviceSize)(descriptorSetNdx * (size_t)MAX_UNIFORM_BUFFER_SIZE),
- m_bufferSize < (descriptorSetNdx + 1) * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- ? m_bufferSize - descriptorSetNdx * (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
- : (vk::VkDeviceSize)MAX_UNIFORM_BUFFER_SIZE
+ calculateBufferPartSize(descriptorSetNdx)
};
const vk::VkWriteDescriptorSet write =
{
{
const deUint32 callId;
const deUint32 valuesPerPixel;
+ const deUint32 bufferSize;
} callParams =
{
(deUint32)descriptorSetNdx,
- (deUint32)divRoundUp<size_t>(m_descriptorSets.size() * (MAX_UNIFORM_BUFFER_SIZE / 4), m_targetWidth * m_targetHeight)
+ m_valuesPerPixel,
+ calculateBufferPartSize(descriptorSetNdx) / 16u
};
vkd.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_GRAPHICS, *m_resources.pipelineLayout, 0u, 1u, &m_descriptorSets[descriptorSetNdx], 0u, DE_NULL);
void RenderFragmentUniformBuffer::verify (VerifyRenderPassContext& context, size_t)
{
- const deUint32 valuesPerPixel = (deUint32)divRoundUp<size_t>(m_descriptorSets.size() * (MAX_UNIFORM_BUFFER_SIZE / 4), m_targetWidth * m_targetHeight);
- const size_t arraySize = MAX_UNIFORM_BUFFER_SIZE / (sizeof(deUint32) * 4);
- const size_t arrayIntSize = arraySize * 4;
+ const size_t arrayIntSize = MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32);
for (int y = 0; y < context.getReferenceTarget().getSize().y(); y++)
for (int x = 0; x < context.getReferenceTarget().getSize().x(); x++)
{
- const size_t firstDescriptorSetNdx = de::min<size_t>((y * 256u + x) / (arrayIntSize / valuesPerPixel), m_descriptorSets.size() - 1);
+ const deUint32 id = (deUint32)y * 256u + (deUint32)x;
+ const size_t firstDescriptorSetNdx = de::min<size_t>(id / (arrayIntSize / m_valuesPerPixel), m_descriptorSets.size() - 1);
for (size_t descriptorSetNdx = firstDescriptorSetNdx; descriptorSetNdx < m_descriptorSets.size(); descriptorSetNdx++)
{
const size_t offset = descriptorSetNdx * MAX_UNIFORM_BUFFER_SIZE;
const deUint32 callId = (deUint32)descriptorSetNdx;
+ const deUint32 count = calculateBufferPartSize(descriptorSetNdx) / 16u;
- const deUint32 id = callId * ((deUint32)arrayIntSize / valuesPerPixel) + (deUint32)y * 256u + (deUint32)x;
-
- if (y * 256u + x < callId * (arrayIntSize / valuesPerPixel))
+ if (id < callId * (arrayIntSize / m_valuesPerPixel))
continue;
else
{
deUint32 value = id;
- for (deUint32 i = 0; i < valuesPerPixel; i++)
+ for (deUint32 i = 0; i < m_valuesPerPixel; i++)
{
- value = ((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 0))
- | (((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 1)) << 8u)
- | (((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 2)) << 16u)
- | (((deUint32)context.getReference().get(offset + (value % (MAX_UNIFORM_BUFFER_SIZE / sizeof(deUint32))) * 4 + 3)) << 24u);
-
+ // in shader UBO has up to 64 items of uvec4, each uvec4 contains 16 values
+ size_t index = offset + size_t((value % count) * 16u) + size_t((value % 4u) * 4u);
+ value = (((deUint32)context.getReference().get(index + 0)))
+ | (((deUint32)context.getReference().get(index + 1)) << 8u)
+ | (((deUint32)context.getReference().get(index + 2)) << 16u)
+ | (((deUint32)context.getReference().get(index + 3)) << 24u);
}
const UVec4 vec ((value >> 0u) & 0xFFu,
(value >> 8u) & 0xFFu,
}
}
+deUint32 RenderFragmentUniformBuffer::calculateBufferPartSize(size_t descriptorSetNdx) const
+{
+ deUint32 size = static_cast<deUint32>(m_bufferSize) - static_cast<deUint32>(descriptorSetNdx) * MAX_UNIFORM_BUFFER_SIZE;
+ if (size < MAX_UNIFORM_BUFFER_SIZE)
+ return size;
+ return MAX_UNIFORM_BUFFER_SIZE;
+}
+
class RenderFragmentStorageBuffer : public RenderPassCommand
{
public:
const vk::Unique<vk::VkShaderModule> fragmentShaderModule (vk::createShaderModule(vkd, device, context.getBinaryCollection().get("storage-buffer.frag"), 0));
vector<vk::VkDescriptorSetLayoutBinding> bindings;
+ // make sure buffer size is multiple of 16 (in glsl we use uvec4 to store 16 values)
m_bufferSize = context.getBufferSize();
+ m_bufferSize = static_cast<vk::VkDeviceSize>(m_bufferSize / 16u) * 16u;
m_targetWidth = context.getTargetWidth();
m_targetHeight = context.getTargetHeight();
for (deUint32 i = 0; i < valuesPerPixel; i++)
{
- value = ((deUint32)context.getReference().get( offset + (value % count) * 4 + 0))
+ value = ((deUint32)context.getReference().get(offset + (value % count) * 4 + 0))
| (((deUint32)context.getReference().get(offset + (value % count) * 4 + 1)) << 8u)
| (((deUint32)context.getReference().get(offset + (value % count) * 4 + 2)) << 16u)
| (((deUint32)context.getReference().get(offset + (value % count) * 4 + 3)) << 24u);
"{\n"
"\tuint callId;\n"
"\tuint valuesPerPixel;\n"
+ "\tuint bufferSize;\n"
"} pushC;\n"
"void main (void) {\n"
- "\thighp uint id = pushC.callId * (" << arrayIntSize << "u / pushC.valuesPerPixel) + uint(gl_FragCoord.y) * 256u + uint(gl_FragCoord.x);\n"
+ "\thighp uint id = uint(gl_FragCoord.y) * 256u + uint(gl_FragCoord.x);\n"
"\tif (uint(gl_FragCoord.y) * 256u + uint(gl_FragCoord.x) < pushC.callId * (" << arrayIntSize << "u / pushC.valuesPerPixel))\n"
"\t\tdiscard;\n"
"\thighp uint value = id;\n"
"\tfor (uint i = 0u; i < pushC.valuesPerPixel; i++)\n"
"\t{\n"
- "\t\thighp uvec4 vecVal = block.values[(value / 4u) % " << arraySize << "u];\n"
+ "\t\thighp uvec4 vecVal = block.values[value % pushC.bufferSize];\n"
"\t\tif ((value % 4u) == 0u)\n"
"\t\t\tvalue = vecVal.x;\n"
"\t\telse if ((value % 4u) == 1u)\n"