1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2018 NVIDIA Corporation
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief Vulkan Memory Model tests
23 *//*--------------------------------------------------------------------*/
25 #include "vktMemoryModelTests.hpp"
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vktTestGroupUtil.hpp"
34 #include "vktTestCase.hpp"
38 #include "deSharedPtr.hpp"
41 #include "tcuTestCase.hpp"
42 #include "tcuTestLog.hpp"
58 TT_MP = 0, // message passing
59 TT_WAR, // write-after-read hazard
69 ST_CONTROL_AND_MEMORY_BARRIER,
100 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
101 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
105 bool payloadMemLocal;
111 StorageClass payloadSC;
112 StorageClass guardSC;
119 class MemoryModelTestInstance : public TestInstance
122 MemoryModelTestInstance (Context& context, const CaseDef& data);
123 ~MemoryModelTestInstance (void);
124 tcu::TestStatus iterate (void);
135 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
136 : vkt::TestInstance (context)
141 MemoryModelTestInstance::~MemoryModelTestInstance (void)
145 class MemoryModelTestCase : public TestCase
148 MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
149 ~MemoryModelTestCase (void);
150 virtual void initPrograms (SourceCollections& programCollection) const;
151 virtual TestInstance* createInstance (Context& context) const;
152 virtual void checkSupport (Context& context) const;
158 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
159 : vkt::TestCase (context, name, desc)
164 MemoryModelTestCase::~MemoryModelTestCase (void)
168 void MemoryModelTestCase::checkSupport(Context& context) const
170 if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
172 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
177 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
179 TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
182 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
184 TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
188 if (m_data.scope == SCOPE_SUBGROUP)
190 // Check for subgroup support for scope_subgroup tests.
191 VkPhysicalDeviceSubgroupProperties subgroupProperties;
192 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
193 subgroupProperties.pNext = DE_NULL;
194 subgroupProperties.supportedOperations = 0;
196 VkPhysicalDeviceProperties2 properties;
197 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
198 properties.pNext = &subgroupProperties;
200 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
202 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
203 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
204 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
206 TCU_THROW(NotSupportedError, "Subgroup features not supported");
209 if (m_data.dataType == DATA_TYPE_UINT64)
211 if (!context.getDeviceFeatures().shaderInt64)
213 TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
215 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
216 m_data.guardSC == SC_BUFFER)
218 TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
220 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
221 m_data.guardSC == SC_WORKGROUP)
223 TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
229 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
231 Scope invocationMapping = m_data.scope;
232 if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
233 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
235 invocationMapping = SCOPE_WORKGROUP;
238 const char *scopeStr;
239 switch (m_data.scope)
241 default: DE_ASSERT(0); // fall through
242 case SCOPE_DEVICE: scopeStr = "gl_ScopeDevice"; break;
243 case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break;
244 case SCOPE_WORKGROUP: scopeStr = "gl_ScopeWorkgroup"; break;
245 case SCOPE_SUBGROUP: scopeStr = "gl_ScopeSubgroup"; break;
248 const char *typeStr = m_data.dataType == DATA_TYPE_UINT64 ? "uint64_t" : "uint";
250 // Construct storageSemantics strings. Both release and acquire
251 // always have the payload storage class. They only include the
252 // guard storage class if they're using FENCE for that side of the
254 std::stringstream storageSemanticsRelease;
255 switch (m_data.payloadSC)
257 default: DE_ASSERT(0); // fall through
258 case SC_BUFFER: storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
259 case SC_IMAGE: storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
260 case SC_WORKGROUP: storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
262 std::stringstream storageSemanticsAcquire;
263 storageSemanticsAcquire << storageSemanticsRelease.str();
264 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
266 switch (m_data.guardSC)
268 default: DE_ASSERT(0); // fall through
269 case SC_BUFFER: storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
270 case SC_IMAGE: storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
271 case SC_WORKGROUP: storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
274 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
276 switch (m_data.guardSC)
278 default: DE_ASSERT(0); // fall through
279 case SC_BUFFER: storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
280 case SC_IMAGE: storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
281 case SC_WORKGROUP: storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
285 std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
287 semanticsRelease << "gl_SemanticsRelease";
288 semanticsAcquire << "gl_SemanticsAcquire";
289 semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
290 if (!m_data.coherent && m_data.testType != TT_WAR)
292 DE_ASSERT(!m_data.core11);
293 semanticsRelease << " | gl_SemanticsMakeAvailable";
294 semanticsAcquire << " | gl_SemanticsMakeVisible";
295 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
298 std::stringstream css;
299 css << "#version 450 core\n";
302 css << "#pragma use_vulkan_memory_model\n";
305 "#extension GL_KHR_shader_subgroup_basic : enable\n"
306 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
307 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
308 "#extension GL_KHR_memory_scope_semantics : enable\n"
309 "#extension GL_ARB_gpu_shader_int64 : enable\n"
310 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
311 "layout(constant_id = 0) const int DIM = 1;\n"
312 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
313 "struct S { " << typeStr << " x[DIM*DIM]; };\n";
315 if (m_data.stage == STAGE_COMPUTE)
317 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
320 const char *memqual = "";
325 // Vulkan 1.1 only has "coherent", use it regardless of scope
326 memqual = "coherent";
330 switch (m_data.scope)
332 default: DE_ASSERT(0); // fall through
333 case SCOPE_DEVICE: memqual = "devicecoherent"; break;
334 case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break;
335 case SCOPE_WORKGROUP: memqual = "workgroupcoherent"; break;
336 case SCOPE_SUBGROUP: memqual = "subgroupcoherent"; break;
342 DE_ASSERT(!m_data.core11);
343 memqual = "nonprivate";
346 // Declare payload, guard, and fail resources
347 switch (m_data.payloadSC)
349 default: DE_ASSERT(0); // fall through
350 case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
351 case SC_IMAGE: css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; break;
352 case SC_WORKGROUP: css << "shared S payload;\n"; break;
354 if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
356 // The guard variable is only accessed with atomics and need not be declared coherent.
357 switch (m_data.guardSC)
359 default: DE_ASSERT(0); // fall through
360 case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
361 case SC_IMAGE: css << "layout(set=0, binding=1, r32ui) uniform uimage2D guard;\n"; break;
362 case SC_WORKGROUP: css << "shared S guard;\n"; break;
366 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
371 " bool pass = true;\n"
372 " bool skip = false;\n";
374 if (m_data.stage == STAGE_FRAGMENT)
376 // Kill helper invocations so they don't load outside the bounds of the SSBO.
377 // Helper pixels are also initially "active" and if a thread gets one as its
378 // partner in SCOPE_SUBGROUP mode, it can't run the test.
379 css << " if (gl_HelperInvocation) { return; }\n";
382 // Compute coordinates based on the storage class and scope.
383 // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
384 // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
385 // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
386 switch (invocationMapping)
388 default: DE_ASSERT(0); // fall through
390 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
391 css << " uvec4 ballot = subgroupBallot(true);\n"
392 " if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
394 switch (m_data.stage)
396 default: DE_ASSERT(0); // fall through
399 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
400 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
401 " uint sharedCoord = localId.y * DIM + localId.x;\n"
402 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
403 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
404 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
405 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
406 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
410 " uint bufferCoord = gl_VertexIndex;\n"
411 " uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
412 " ivec2 imageCoord = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
413 " ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
414 " gl_PointSize = 1.0f;\n"
415 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
419 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
420 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
421 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
422 " ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
423 " uint sharedCoord = localId.y * DIM + localId.x;\n"
424 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
425 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
426 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
427 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
428 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
432 case SCOPE_WORKGROUP:
434 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
435 " ivec2 partnerLocalId = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
436 " uint sharedCoord = localId.y * DIM + localId.x;\n"
437 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
438 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
439 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
440 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
441 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
443 case SCOPE_QUEUEFAMILY:
445 switch (m_data.stage)
447 default: DE_ASSERT(0); // fall through
450 " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
451 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
452 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
453 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
454 " ivec2 imageCoord = globalId;\n"
455 " ivec2 partnerImageCoord = partnerGlobalId;\n";
459 " ivec2 globalId = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
460 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
461 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
462 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
463 " ivec2 imageCoord = globalId;\n"
464 " ivec2 partnerImageCoord = partnerGlobalId;\n"
465 " gl_PointSize = 1.0f;\n"
466 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
470 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
471 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
472 " ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
473 " ivec2 partnerGroupId = groupId;\n"
474 " uint sharedCoord = localId.y * DIM + localId.x;\n"
475 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
476 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
477 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
478 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
479 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
485 // Initialize shared memory, followed by a barrier
486 if (m_data.payloadSC == SC_WORKGROUP)
488 css << " payload.x[sharedCoord] = 0;\n";
490 if (m_data.guardSC == SC_WORKGROUP)
492 css << " guard.x[sharedCoord] = 0;\n";
494 if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
496 switch (invocationMapping)
498 default: DE_ASSERT(0); // fall through
499 case SCOPE_SUBGROUP: css << " subgroupBarrier();\n"; break;
500 case SCOPE_WORKGROUP: css << " barrier();\n"; break;
504 if (m_data.testType == TT_MP)
507 switch (m_data.payloadSC)
509 default: DE_ASSERT(0); // fall through
510 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
511 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
512 case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
517 DE_ASSERT(m_data.testType == TT_WAR);
519 switch (m_data.payloadSC)
521 default: DE_ASSERT(0); // fall through
522 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
523 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
524 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
527 if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
529 // Acquire and release separate from control barrier
530 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
531 " controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
532 " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
534 else if (m_data.syncType == ST_CONTROL_BARRIER)
536 // Control barrier performs both acquire and release
537 css << " controlBarrier(" << scopeStr << ", " << scopeStr << ", "
538 << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
539 << semanticsAcquireRelease.str() << ");\n";
544 std::stringstream atomicReleaseSemantics;
545 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
547 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
548 atomicReleaseSemantics << ", 0, 0";
552 atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
554 // Atomic store guard
555 if (m_data.atomicRMW)
557 switch (m_data.guardSC)
559 default: DE_ASSERT(0); // fall through
560 case SC_BUFFER: css << " atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
561 case SC_IMAGE: css << " imageAtomicExchange(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
562 case SC_WORKGROUP: css << " atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
567 switch (m_data.guardSC)
569 default: DE_ASSERT(0); // fall through
570 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
571 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
572 case SC_WORKGROUP: css << " atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
576 std::stringstream atomicAcquireSemantics;
577 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
579 atomicAcquireSemantics << ", 0, 0";
583 atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
586 if (m_data.atomicRMW)
588 switch (m_data.guardSC)
590 default: DE_ASSERT(0); // fall through
591 case SC_BUFFER: css << " skip = atomicExchange(guard.x[partnerBufferCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
592 case SC_IMAGE: css << " skip = imageAtomicExchange(guard, partnerImageCoord, 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
593 case SC_WORKGROUP: css << " skip = atomicExchange(guard.x[partnerSharedCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
597 switch (m_data.guardSC)
599 default: DE_ASSERT(0); // fall through
600 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
601 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
602 case SC_WORKGROUP: css << " skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
606 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
608 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
611 if (m_data.testType == TT_MP)
614 switch (m_data.payloadSC)
616 default: DE_ASSERT(0); // fall through
617 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
618 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
619 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
622 " if (!skip && r != partnerBufferCoord) { fail.x[bufferCoord] = 1; }\n"
627 DE_ASSERT(m_data.testType == TT_WAR);
628 // Store payload, only if the partner invocation has already done its read
629 css << " if (!skip) {\n ";
630 switch (m_data.payloadSC)
632 default: DE_ASSERT(0); // fall through
633 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord;\n"; break;
634 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n"; break;
635 case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord;\n"; break;
639 " if (r != 0) { fail.x[bufferCoord] = 1; }\n"
643 // Draw a fullscreen triangle strip based on gl_VertexIndex
644 std::stringstream vss;
646 "#version 450 core\n"
647 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
648 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
650 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
652 switch (m_data.stage)
654 default: DE_ASSERT(0); // fall through
656 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
659 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
662 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
663 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
668 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
670 return new MemoryModelTestInstance(context, m_data);
673 VkBufferCreateInfo makeBufferCreateInfo (const VkDeviceSize bufferSize,
674 const VkBufferUsageFlags usage)
676 const VkBufferCreateInfo bufferCreateInfo =
678 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
679 DE_NULL, // const void* pNext;
680 (VkBufferCreateFlags)0, // VkBufferCreateFlags flags;
681 bufferSize, // VkDeviceSize size;
682 usage, // VkBufferUsageFlags usage;
683 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
684 0u, // deUint32 queueFamilyIndexCount;
685 DE_NULL, // const deUint32* pQueueFamilyIndices;
687 return bufferCreateInfo;
690 Move<VkDescriptorSet> makeDescriptorSet (const DeviceInterface& vk,
691 const VkDevice device,
692 const VkDescriptorPool descriptorPool,
693 const VkDescriptorSetLayout setLayout)
695 const VkDescriptorSetAllocateInfo allocateParams =
697 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
698 DE_NULL, // const void* pNext;
699 descriptorPool, // VkDescriptorPool descriptorPool;
700 1u, // deUint32 setLayoutCount;
701 &setLayout, // const VkDescriptorSetLayout* pSetLayouts;
703 return allocateDescriptorSet(vk, device, &allocateParams);
706 tcu::TestStatus MemoryModelTestInstance::iterate (void)
708 const DeviceInterface& vk = m_context.getDeviceInterface();
709 const VkDevice device = m_context.getDevice();
710 Allocator& allocator = m_context.getDefaultAllocator();
712 VkPhysicalDeviceProperties2 properties;
713 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
714 properties.pNext = NULL;
716 m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
719 deUint32 NUM_WORKGROUP_EACH_DIM = 8;
720 // If necessary, shrink workgroup size to fit HW limits
721 if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
723 DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
725 deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
727 VkDeviceSize bufferSizes[3];
728 de::MovePtr<BufferWithMemory> buffers[3];
729 vk::VkDescriptorBufferInfo bufferDescriptors[3];
730 de::MovePtr<BufferWithMemory> copyBuffer;
732 for (deUint32 i = 0; i < 3; ++i)
734 size_t elementSize = m_data.dataType == DATA_TYPE_UINT64 ? sizeof(deUint64) : sizeof(deUint32);
735 // buffer2 is the "fail" buffer, and is always uint
737 elementSize = sizeof(deUint32);
738 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
743 default: DE_ASSERT(0); // fall through
745 if (m_data.payloadSC != SC_BUFFER)
747 local = m_data.payloadMemLocal;
750 if (m_data.guardSC != SC_BUFFER)
752 local = m_data.guardMemLocal;
754 case 2: local = true; break;
759 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
760 vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
761 local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
763 catch (const tcu::NotSupportedError&)
767 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
771 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
774 // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
777 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
778 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
780 catch (const tcu::NotSupportedError&)
782 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
783 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
786 const VkImageCreateInfo imageCreateInfo =
788 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
789 DE_NULL, // const void* pNext;
790 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
791 VK_IMAGE_TYPE_2D, // VkImageType imageType;
792 VK_FORMAT_R32_UINT, // VkFormat format;
794 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 width;
795 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 height;
796 1u // deUint32 depth;
797 }, // VkExtent3D extent;
798 1u, // deUint32 mipLevels;
799 1u, // deUint32 arrayLayers;
800 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
801 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
802 VK_IMAGE_USAGE_STORAGE_BIT
803 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
804 | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
805 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
806 0u, // deUint32 queueFamilyIndexCount;
807 DE_NULL, // const deUint32* pQueueFamilyIndices;
808 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
810 VkImageViewCreateInfo imageViewCreateInfo =
812 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
813 DE_NULL, // const void* pNext;
814 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
815 DE_NULL, // VkImage image;
816 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
817 VK_FORMAT_R32_UINT, // VkFormat format;
819 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
820 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
821 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
822 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
823 }, // VkComponentMapping components;
825 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
826 0u, // deUint32 baseMipLevel;
827 1u, // deUint32 levelCount;
828 0u, // deUint32 baseArrayLayer;
829 1u // deUint32 layerCount;
830 } // VkImageSubresourceRange subresourceRange;
834 de::MovePtr<ImageWithMemory> images[2];
835 Move<VkImageView> imageViews[2];
836 vk::VkDescriptorImageInfo imageDescriptors[2];
838 for (deUint32 i = 0; i < 2; ++i)
844 default: DE_ASSERT(0); // fall through
846 if (m_data.payloadSC != SC_IMAGE)
848 local = m_data.payloadMemLocal;
851 if (m_data.guardSC != SC_IMAGE)
853 local = m_data.guardMemLocal;
859 images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
860 vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
862 catch (const tcu::NotSupportedError&)
866 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
870 imageViewCreateInfo.image = **images[i];
871 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
873 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
876 vk::DescriptorSetLayoutBuilder layoutBuilder;
878 switch (m_data.payloadSC)
881 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
882 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
884 switch (m_data.guardSC)
887 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
888 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
890 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
892 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
894 vk::Unique<vk::VkDescriptorPool> descriptorPool(vk::DescriptorPoolBuilder()
895 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
896 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
897 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
898 vk::Unique<vk::VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
900 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
901 switch (m_data.payloadSC)
903 default: DE_ASSERT(0); // fall through
907 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
908 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
911 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
912 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
915 switch (m_data.guardSC)
917 default: DE_ASSERT(0); // fall through
921 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
922 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
925 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
926 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
929 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
930 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
932 setUpdateBuilder.update(vk, device);
935 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
937 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
939 (VkPipelineLayoutCreateFlags)0,
941 &descriptorSetLayout.get(), // pSetLayouts
942 0u, // pushConstantRangeCount
943 DE_NULL, // pPushConstantRanges
946 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
948 Move<VkPipeline> pipeline;
949 Move<VkRenderPass> renderPass;
950 Move<VkFramebuffer> framebuffer;
952 VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
954 const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
956 const vk::VkSpecializationMapEntry entries[3] =
958 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
959 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
962 const vk::VkSpecializationInfo specInfo =
965 entries, // pMapEntries
966 sizeof(specData), // dataSize
970 if (m_data.stage == STAGE_COMPUTE)
972 const Unique<VkShaderModule> shader (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
974 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
976 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
978 (VkPipelineShaderStageCreateFlags)0,
979 VK_SHADER_STAGE_COMPUTE_BIT, // stage
982 &specInfo, // pSpecializationInfo
985 const VkComputePipelineCreateInfo pipelineCreateInfo =
987 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
990 shaderCreateInfo, // cs
991 *pipelineLayout, // layout
992 (vk::VkPipeline)0, // basePipelineHandle
993 0u, // basePipelineIndex
995 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1000 const vk::VkSubpassDescription subpassDesc =
1002 (vk::VkSubpassDescriptionFlags)0,
1003 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1005 DE_NULL, // pInputAttachments
1007 DE_NULL, // pColorAttachments
1008 DE_NULL, // pResolveAttachments
1009 DE_NULL, // depthStencilAttachment
1010 0u, // preserveCount
1011 DE_NULL, // pPreserveAttachments
1014 const vk::VkRenderPassCreateInfo renderPassParams =
1016 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1018 (vk::VkRenderPassCreateFlags)0,
1019 0u, // attachmentCount
1020 DE_NULL, // pAttachments
1022 &subpassDesc, // pSubpasses
1023 0u, // dependencyCount
1024 DE_NULL, // pDependencies
1027 renderPass = createRenderPass(vk, device, &renderPassParams);
1029 const vk::VkFramebufferCreateInfo framebufferParams =
1031 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1033 (vk::VkFramebufferCreateFlags)0,
1034 *renderPass, // renderPass
1035 0u, // attachmentCount
1036 DE_NULL, // pAttachments
1037 DIM*NUM_WORKGROUP_EACH_DIM, // width
1038 DIM*NUM_WORKGROUP_EACH_DIM, // height
1042 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1044 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
1046 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1047 DE_NULL, // const void* pNext;
1048 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1049 0u, // deUint32 vertexBindingDescriptionCount;
1050 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1051 0u, // deUint32 vertexAttributeDescriptionCount;
1052 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1055 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
1057 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1058 DE_NULL, // const void* pNext;
1059 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1060 (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1061 VK_FALSE // VkBool32 primitiveRestartEnable;
1064 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
1066 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1067 DE_NULL, // const void* pNext;
1068 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
1069 VK_FALSE, // VkBool32 depthClampEnable;
1070 (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1071 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1072 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1073 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
1074 VK_FALSE, // VkBool32 depthBiasEnable;
1075 0.0f, // float depthBiasConstantFactor;
1076 0.0f, // float depthBiasClamp;
1077 0.0f, // float depthBiasSlopeFactor;
1078 1.0f // float lineWidth;
1081 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
1083 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
1084 DE_NULL, // const void* pNext
1085 0u, // VkPipelineMultisampleStateCreateFlags flags
1086 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
1087 VK_FALSE, // VkBool32 sampleShadingEnable
1088 1.0f, // float minSampleShading
1089 DE_NULL, // const VkSampleMask* pSampleMask
1090 VK_FALSE, // VkBool32 alphaToCoverageEnable
1091 VK_FALSE // VkBool32 alphaToOneEnable
1094 VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1095 VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1097 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
1099 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
1100 DE_NULL, // const void* pNext
1101 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
1102 1u, // deUint32 viewportCount
1103 &viewport, // const VkViewport* pViewports
1104 1u, // deUint32 scissorCount
1105 &scissor // const VkRect2D* pScissors
1108 Move<VkShaderModule> fs;
1109 Move<VkShaderModule> vs;
1112 if (m_data.stage == STAGE_VERTEX)
1114 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1115 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1120 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1121 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1125 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1127 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1129 (VkPipelineShaderStageCreateFlags)0,
1130 VK_SHADER_STAGE_VERTEX_BIT, // stage
1133 &specInfo, // pSpecializationInfo
1136 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1138 (VkPipelineShaderStageCreateFlags)0,
1139 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1142 &specInfo, // pSpecializationInfo
1146 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo =
1148 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1149 DE_NULL, // const void* pNext;
1150 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
1151 numStages, // deUint32 stageCount;
1152 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
1153 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1154 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1155 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1156 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
1157 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1158 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1159 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1160 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1161 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1162 pipelineLayout.get(), // VkPipelineLayout layout;
1163 renderPass.get(), // VkRenderPass renderPass;
1164 0u, // deUint32 subpass;
1165 DE_NULL, // VkPipeline basePipelineHandle;
1166 0 // int basePipelineIndex;
1169 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1172 const VkQueue queue = m_context.getUniversalQueue();
1173 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
1174 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1176 beginCommandBuffer(vk, *cmdBuffer, 0u);
1178 vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1180 for (deUint32 i = 0; i < 2; ++i)
1185 const VkImageMemoryBarrier imageBarrier =
1187 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
1188 DE_NULL, // const void* pNext
1189 0u, // VkAccessFlags srcAccessMask
1190 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
1191 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
1192 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
1193 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
1194 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
1195 **images[i], // VkImage image
1197 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
1198 0u, // uint32_t baseMipLevel
1199 1u, // uint32_t mipLevels,
1200 0u, // uint32_t baseArray
1201 1u, // uint32_t arraySize
1205 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1206 (VkDependencyFlags)0,
1207 0, (const VkMemoryBarrier*)DE_NULL,
1208 0, (const VkBufferMemoryBarrier*)DE_NULL,
1212 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1213 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1215 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1216 VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1218 VkMemoryBarrier memBarrier =
1220 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1222 0u, // srcAccessMask
1223 0u, // dstAccessMask
1226 for (deUint32 iters = 0; iters < 200; ++iters)
1228 for (deUint32 i = 0; i < 2; ++i)
1231 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1233 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1236 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1237 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1238 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1239 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1241 if (m_data.stage == STAGE_COMPUTE)
1243 vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1247 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1248 makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1249 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1250 // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1251 if (m_data.stage == STAGE_VERTEX)
1253 vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1257 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1259 endRenderPass(vk, *cmdBuffer);
1262 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1263 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1264 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1265 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1268 const VkBufferCopy copyParams =
1270 (VkDeviceSize)0u, // srcOffset
1271 (VkDeviceSize)0u, // dstOffset
1272 bufferSizes[2] // size
1275 vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, ©Params);
1277 endCommandBuffer(vk, *cmdBuffer);
1279 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1281 tcu::TestLog& log = m_context.getTestContext().getLog();
1283 deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1284 invalidateMappedMemoryRange(vk, device, copyBuffer->getAllocation().getMemory(), copyBuffer->getAllocation().getOffset(), bufferSizes[2]);
1285 qpTestResult res = QP_TEST_RESULT_PASS;
1287 deUint32 numErrors = 0;
1288 for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1292 if (numErrors < 256)
1294 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1297 res = QP_TEST_RESULT_FAIL;
1303 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1306 return tcu::TestStatus(res, qpGetTestResultName(res));
1311 tcu::TestCaseGroup* createTests (tcu::TestContext& testCtx)
1313 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1314 testCtx, "memory_model", "Memory model tests"));
1320 const char* description;
1323 TestGroupCase ttCases[] =
1325 { TT_MP, "message_passing", "message passing" },
1326 { TT_WAR, "write_after_read", "write after read" },
1329 TestGroupCase core11Cases[] =
1331 { 1, "core11", "Supported by Vulkan1.1" },
1332 { 0, "ext", "Requires VK_KHR_vulkan_memory_model extension" },
1335 TestGroupCase dtCases[] =
1337 { DATA_TYPE_UINT, "u32", "uint32_t atomics" },
1338 { DATA_TYPE_UINT64, "u64", "uint64_t atomics" },
1341 TestGroupCase cohCases[] =
1343 { 1, "coherent", "coherent payload variable" },
1344 { 0, "noncoherent", "noncoherent payload variable" },
1347 TestGroupCase stCases[] =
1349 { ST_FENCE_FENCE, "fence_fence", "release fence, acquire fence" },
1350 { ST_FENCE_ATOMIC, "fence_atomic", "release fence, atomic acquire" },
1351 { ST_ATOMIC_FENCE, "atomic_fence", "atomic release, acquire fence" },
1352 { ST_ATOMIC_ATOMIC, "atomic_atomic", "atomic release, atomic acquire" },
1353 { ST_CONTROL_BARRIER, "control_barrier", "control barrier" },
1354 { ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier", "control barrier with release/acquire" },
1357 TestGroupCase rmwCases[] =
1359 { 0, "atomicwrite", "atomic write" },
1360 { 1, "atomicrmw", "atomic rmw" },
1363 TestGroupCase scopeCases[] =
1365 { SCOPE_DEVICE, "device", "device scope" },
1366 { SCOPE_QUEUEFAMILY, "queuefamily", "queuefamily scope" },
1367 { SCOPE_WORKGROUP, "workgroup", "workgroup scope" },
1368 { SCOPE_SUBGROUP, "subgroup", "subgroup scope" },
1371 TestGroupCase plCases[] =
1373 { 0, "payload_nonlocal", "payload variable in non-local memory" },
1374 { 1, "payload_local", "payload variable in local memory" },
1377 TestGroupCase pscCases[] =
1379 { SC_BUFFER, "buffer", "payload variable in buffer memory" },
1380 { SC_IMAGE, "image", "payload variable in image memory" },
1381 { SC_WORKGROUP, "workgroup", "payload variable in workgroup memory" },
1384 TestGroupCase glCases[] =
1386 { 0, "guard_nonlocal", "guard variable in non-local memory" },
1387 { 1, "guard_local", "guard variable in local memory" },
1390 TestGroupCase gscCases[] =
1392 { SC_BUFFER, "buffer", "guard variable in buffer memory" },
1393 { SC_IMAGE, "image", "guard variable in image memory" },
1394 { SC_WORKGROUP, "workgroup", "guard variable in workgroup memory" },
1397 TestGroupCase stageCases[] =
1399 { STAGE_COMPUTE, "comp", "compute shader" },
1400 { STAGE_VERTEX, "vert", "vertex shader" },
1401 { STAGE_FRAGMENT, "frag", "fragment shader" },
1405 for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1407 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1408 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1410 de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1411 for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1413 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1414 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1416 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1417 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1419 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1420 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1422 de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1423 for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1425 de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1426 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1428 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1429 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1431 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1432 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1434 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1435 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1437 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1438 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1442 !!plCases[plNdx].value, // bool payloadMemLocal;
1443 !!glCases[glNdx].value, // bool guardMemLocal;
1444 !!cohCases[cohNdx].value, // bool coherent;
1445 !!core11Cases[core11Ndx].value, // bool core11;
1446 !!rmwCases[rmwNdx].value, // bool atomicRMW;
1447 (TestType)ttCases[ttNdx].value, // TestType testType;
1448 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
1449 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
1450 (Scope)scopeCases[scopeNdx].value, // Scope scope;
1451 (SyncType)stCases[stNdx].value, // SyncType syncType;
1452 (Stage)stageCases[stageNdx].value, // Stage stage;
1453 (DataType)dtCases[dtNdx].value, // DataType dataType;
1456 // Mustpass11 tests should only exercise things we expect to work on
1457 // existing implementations. Exclude noncoherent tests which require
1458 // new extensions, and assume atomic synchronization wouldn't work
1459 // (i.e. atomics may be implemented as relaxed atomics). Exclude
1460 // queuefamily scope which doesn't exist in Vulkan 1.1.
1463 c.syncType == ST_FENCE_ATOMIC ||
1464 c.syncType == ST_ATOMIC_FENCE ||
1465 c.syncType == ST_ATOMIC_ATOMIC ||
1466 c.dataType == DATA_TYPE_UINT64 ||
1467 c.scope == SCOPE_QUEUEFAMILY))
1472 if (c.stage != STAGE_COMPUTE &&
1473 c.scope == SCOPE_WORKGROUP)
1478 // Don't exercise local and non-local for workgroup memory
1479 // Also don't exercise workgroup memory for non-compute stages
1480 if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1484 if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1488 // Can't do control barrier with larger than workgroup scope, or non-compute stages
1489 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1490 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1495 // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1496 if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1501 // uint64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1502 if (c.dataType == DATA_TYPE_UINT64 && c.syncType != ST_ATOMIC_ATOMIC)
1507 // No 64-bit image types, so skip tests with both payload and guard in image memory
1508 if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1513 // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1514 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1515 (c.guardSC != 0 || c.guardMemLocal != 0))
1520 gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1522 glGroup->addChild(gscGroup.release());
1524 pscGroup->addChild(glGroup.release());
1526 plGroup->addChild(pscGroup.release());
1528 scopeGroup->addChild(plGroup.release());
1530 rmwGroup->addChild(scopeGroup.release());
1532 stGroup->addChild(rmwGroup.release());
1534 cohGroup->addChild(stGroup.release());
1536 dtGroup->addChild(cohGroup.release());
1538 core11Group->addChild(dtGroup.release());
1540 ttGroup->addChild(core11Group.release());
1542 group->addChild(ttGroup.release());
1544 return group.release();