"OpDecorate %id BuiltIn GlobalInvocationId\n"
- + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
- + string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
++ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
+ "%uvec2 = OpTypeVector %u32 2\n"
+ "%bvec3 = OpTypeVector %bool 3\n"
+ "%fvec4 = OpTypeVector %f32 4\n"
+ "%fmat33 = OpTypeMatrix %fvec3 3\n"
+ "%const100 = OpConstant %u32 100\n"
+ "%uarr100 = OpTypeArray %i32 %const100\n"
+ "%struct = OpTypeStruct %f32 %i32 %u32\n"
+ "%pointer = OpTypePointer Function %i32\n"
- + string(s_InputOutputBuffer) +
++ + string(getComputeAsmInputOutputBuffer()) +
- "${TYPE}\n"
- "%null = OpConstantNull %type\n"
+ "%null = OpConstantNull ${TYPE}\n"
"%id = OpVariable %uvec3ptr Input\n"
"%zero = OpConstant %i32 0\n"
"OpDecorate %id BuiltIn GlobalInvocationId\n"
- + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
-
- "${TYPE}\n"
-
- + string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
++ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
+ "%uvec2 = OpTypeVector %u32 2\n"
+ "%fvec4 = OpTypeVector %f32 4\n"
+ "%fmat33 = OpTypeMatrix %fvec3 3\n"
+ "%image = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
+ "%sampler = OpTypeSampler\n"
+ "%simage = OpTypeSampledImage %image\n"
+ "%const100 = OpConstant %u32 100\n"
+ "%uarr100 = OpTypeArray %i32 %const100\n"
+ "%struct = OpTypeStruct %f32 %i32 %u32\n"
+ "%pointer = OpTypePointer Function %i32\n"
- + string(s_InputOutputBuffer) +
++ + string(getComputeAsmInputOutputBuffer()) +
"%id = OpVariable %uvec3ptr Input\n"
"%zero = OpConstant %i32 0\n"
vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
+ // Add barriers for shader writes to storage buffers before host access
+ std::vector<vk::VkBufferMemoryBarrier> barriers;
+ if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
+ {
+ for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
+ {
+ const vk::VkBufferMemoryBarrier barrier =
+ {
+ vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ DE_NULL,
+ vk::VK_ACCESS_SHADER_WRITE_BIT,
+ vk::VK_ACCESS_HOST_READ_BIT,
+ VK_QUEUE_FAMILY_IGNORED,
+ VK_QUEUE_FAMILY_IGNORED,
+ static_cast<vk::VkBuffer>(*m_uniformBuffers[blockNdx]),
+ 0u,
+ static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])
+ };
+ barriers.push_back(barrier);
+ }
+ }
+ else
+ {
+ vk::VkDeviceSize totalSize = 0;
+ for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
+ totalSize += bufferSizes[bufferNdx];
+
+ const vk::VkBufferMemoryBarrier barrier =
+ {
+ vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ DE_NULL,
+ vk::VK_ACCESS_SHADER_WRITE_BIT,
+ vk::VK_ACCESS_HOST_READ_BIT,
+ VK_QUEUE_FAMILY_IGNORED,
+ VK_QUEUE_FAMILY_IGNORED,
+ static_cast<vk::VkBuffer>(*m_uniformBuffers[0]),
+ 0u,
+ totalSize
+ };
+ barriers.push_back(barrier);
+ }
+ vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
+ 0u, DE_NULL, static_cast<deUint32>(barriers.size()), &barriers[0], 0u, DE_NULL);
+
VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
- const vk::VkFenceCreateInfo fenceParams =
- {
- vk::VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType;
- DE_NULL, // const void* pNext;
- 0u, // VkFenceCreateFlags flags;
- };
- vk::Move<vk::VkFence> fence (createFence(vk, device, &fenceParams));
+ vk::Move<vk::VkFence> fence (createFence(vk, device));
const vk::VkSubmitInfo submitInfo =
{