Check for required features in memory model tests
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / memory_model / vktMemoryModelMessagePassing.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2018 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *        http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Vulkan Memory Model tests
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktMemoryModelTests.hpp"
26
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vktTestGroupUtil.hpp"
34 #include "vktTestCase.hpp"
35
36 #include "deDefs.h"
37 #include "deMath.h"
38 #include "deSharedPtr.hpp"
39 #include "deString.h"
40
41 #include "tcuTestCase.hpp"
42 #include "tcuTestLog.hpp"
43
44 #include <string>
45 #include <sstream>
46
47 namespace vkt
48 {
49 namespace MemoryModel
50 {
51 namespace
52 {
53 using namespace vk;
54 using namespace std;
55
56 typedef enum
57 {
58         TT_MP = 0,  // message passing
59         TT_WAR, // write-after-read hazard
60 } TestType;
61
62 typedef enum
63 {
64         ST_FENCE_FENCE = 0,
65         ST_FENCE_ATOMIC,
66         ST_ATOMIC_FENCE,
67         ST_ATOMIC_ATOMIC,
68         ST_CONTROL_BARRIER,
69         ST_CONTROL_AND_MEMORY_BARRIER,
70 } SyncType;
71
72 typedef enum
73 {
74         SC_BUFFER = 0,
75         SC_IMAGE,
76         SC_WORKGROUP,
77 } StorageClass;
78
79 typedef enum
80 {
81         SCOPE_DEVICE = 0,
82         SCOPE_QUEUEFAMILY,
83         SCOPE_WORKGROUP,
84         SCOPE_SUBGROUP,
85 } Scope;
86
87 typedef enum
88 {
89         STAGE_COMPUTE = 0,
90         STAGE_VERTEX,
91         STAGE_FRAGMENT,
92 } Stage;
93
94 typedef enum
95 {
96         DATA_TYPE_UINT = 0,
97         DATA_TYPE_UINT64,
98 } DataType;
99
100 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
101 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
102
103 struct CaseDef
104 {
105         bool payloadMemLocal;
106         bool guardMemLocal;
107         bool coherent;
108         bool core11;
109         bool atomicRMW;
110         TestType testType;
111         StorageClass payloadSC;
112         StorageClass guardSC;
113         Scope scope;
114         SyncType syncType;
115         Stage stage;
116         DataType dataType;
117 };
118
119 class MemoryModelTestInstance : public TestInstance
120 {
121 public:
122                                                 MemoryModelTestInstance (Context& context, const CaseDef& data);
123                                                 ~MemoryModelTestInstance        (void);
124         tcu::TestStatus         iterate                         (void);
125 private:
126         CaseDef                 m_data;
127
128         enum
129         {
130                 WIDTH = 256,
131                 HEIGHT = 256
132         };
133 };
134
135 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
136         : vkt::TestInstance             (context)
137         , m_data                                (data)
138 {
139 }
140
141 MemoryModelTestInstance::~MemoryModelTestInstance (void)
142 {
143 }
144
145 class MemoryModelTestCase : public TestCase
146 {
147         public:
148                                                                 MemoryModelTestCase             (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
149                                                                 ~MemoryModelTestCase    (void);
150         virtual void                            initPrograms            (SourceCollections& programCollection) const;
151         virtual TestInstance*           createInstance          (Context& context) const;
152         virtual void                            checkSupport            (Context& context) const;
153
154 private:
155         CaseDef                                 m_data;
156 };
157
158 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
159         : vkt::TestCase (context, name, desc)
160         , m_data                (data)
161 {
162 }
163
164 MemoryModelTestCase::~MemoryModelTestCase       (void)
165 {
166 }
167
168 void MemoryModelTestCase::checkSupport(Context& context) const
169 {
170         if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
171         {
172                 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
173         }
174
175         if (!m_data.core11)
176         {
177                 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
178                 {
179                         TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
180                 }
181
182                 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
183                 {
184                         TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
185                 }
186         }
187
188         if (m_data.scope == SCOPE_SUBGROUP)
189         {
190                 // Check for subgroup support for scope_subgroup tests.
191                 VkPhysicalDeviceSubgroupProperties subgroupProperties;
192                 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
193                 subgroupProperties.pNext = DE_NULL;
194                 subgroupProperties.supportedOperations = 0;
195
196                 VkPhysicalDeviceProperties2 properties;
197                 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
198                 properties.pNext = &subgroupProperties;
199
200                 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
201
202                 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
203                         !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
204                         !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
205                 {
206                         TCU_THROW(NotSupportedError, "Subgroup features not supported");
207                 }
208         }
209         if (m_data.dataType == DATA_TYPE_UINT64)
210         {
211                 if (!context.getDeviceFeatures().shaderInt64)
212                 {
213                         TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
214                 }
215                 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
216                         m_data.guardSC == SC_BUFFER)
217                 {
218                         TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
219                 }
220                 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
221                         m_data.guardSC == SC_WORKGROUP)
222                 {
223                         TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
224                 }
225         }
226         if (m_data.stage == STAGE_VERTEX)
227         {
228                 if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
229                 {
230                         TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
231                 }
232         }
233         if (m_data.stage == STAGE_FRAGMENT)
234         {
235                 if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
236                 {
237                         TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
238                 }
239         }
240 }
241
242
243 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
244 {
245         Scope invocationMapping = m_data.scope;
246         if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
247                 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
248         {
249                 invocationMapping = SCOPE_WORKGROUP;
250         }
251
252         const char *scopeStr;
253         switch (m_data.scope)
254         {
255         default: DE_ASSERT(0); // fall through
256         case SCOPE_DEVICE:              scopeStr = "gl_ScopeDevice"; break;
257         case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break;
258         case SCOPE_WORKGROUP:   scopeStr = "gl_ScopeWorkgroup"; break;
259         case SCOPE_SUBGROUP:    scopeStr = "gl_ScopeSubgroup"; break;
260         }
261
262         const char *typeStr = m_data.dataType == DATA_TYPE_UINT64 ? "uint64_t" : "uint";
263
264         // Construct storageSemantics strings. Both release and acquire
265         // always have the payload storage class. They only include the
266         // guard storage class if they're using FENCE for that side of the
267         // sync.
268         std::stringstream storageSemanticsRelease;
269         switch (m_data.payloadSC)
270         {
271         default: DE_ASSERT(0); // fall through
272         case SC_BUFFER:         storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
273         case SC_IMAGE:          storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
274         case SC_WORKGROUP:      storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
275         }
276         std::stringstream storageSemanticsAcquire;
277         storageSemanticsAcquire << storageSemanticsRelease.str();
278         if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
279         {
280                 switch (m_data.guardSC)
281                 {
282                 default: DE_ASSERT(0); // fall through
283                 case SC_BUFFER:         storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
284                 case SC_IMAGE:          storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
285                 case SC_WORKGROUP:      storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
286                 }
287         }
288         if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
289         {
290                 switch (m_data.guardSC)
291                 {
292                 default: DE_ASSERT(0); // fall through
293                 case SC_BUFFER:         storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
294                 case SC_IMAGE:          storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
295                 case SC_WORKGROUP:      storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
296                 }
297         }
298
299         std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
300
301         semanticsRelease << "gl_SemanticsRelease";
302         semanticsAcquire << "gl_SemanticsAcquire";
303         semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
304         if (!m_data.coherent && m_data.testType != TT_WAR)
305         {
306                 DE_ASSERT(!m_data.core11);
307                 semanticsRelease << " | gl_SemanticsMakeAvailable";
308                 semanticsAcquire << " | gl_SemanticsMakeVisible";
309                 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
310         }
311
312         std::stringstream css;
313         css << "#version 450 core\n";
314         if (!m_data.core11)
315         {
316                 css << "#pragma use_vulkan_memory_model\n";
317         }
318         css <<
319                 "#extension GL_KHR_shader_subgroup_basic : enable\n"
320                 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
321                 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
322                 "#extension GL_KHR_memory_scope_semantics : enable\n"
323                 "#extension GL_ARB_gpu_shader_int64 : enable\n"
324                 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
325                 "layout(constant_id = 0) const int DIM = 1;\n"
326                 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
327                 "struct S { " << typeStr << " x[DIM*DIM]; };\n";
328
329         if (m_data.stage == STAGE_COMPUTE)
330         {
331                 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
332         }
333
334         const char *memqual = "";
335         if (m_data.coherent)
336         {
337                 if (m_data.core11)
338                 {
339                         // Vulkan 1.1 only has "coherent", use it regardless of scope
340                         memqual = "coherent";
341                 }
342                 else
343                 {
344                         switch (m_data.scope)
345                         {
346                         default: DE_ASSERT(0); // fall through
347                         case SCOPE_DEVICE:              memqual = "devicecoherent"; break;
348                         case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break;
349                         case SCOPE_WORKGROUP:   memqual = "workgroupcoherent"; break;
350                         case SCOPE_SUBGROUP:    memqual = "subgroupcoherent"; break;
351                         }
352                 }
353         }
354         else
355         {
356                 DE_ASSERT(!m_data.core11);
357                 memqual = "nonprivate";
358         }
359
360         // Declare payload, guard, and fail resources
361         switch (m_data.payloadSC)
362         {
363         default: DE_ASSERT(0); // fall through
364         case SC_BUFFER:         css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
365         case SC_IMAGE:          css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; break;
366         case SC_WORKGROUP:      css << "shared S payload;\n"; break;
367         }
368         if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
369         {
370                 // The guard variable is only accessed with atomics and need not be declared coherent.
371                 switch (m_data.guardSC)
372                 {
373                 default: DE_ASSERT(0); // fall through
374                 case SC_BUFFER:         css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
375                 case SC_IMAGE:          css << "layout(set=0, binding=1, r32ui) uniform uimage2D guard;\n"; break;
376                 case SC_WORKGROUP:      css << "shared S guard;\n"; break;
377                 }
378         }
379
380         css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
381
382         css <<
383                 "void main()\n"
384                 "{\n"
385                 "   bool pass = true;\n"
386                 "   bool skip = false;\n";
387
388         if (m_data.stage == STAGE_FRAGMENT)
389         {
390                 // Kill helper invocations so they don't load outside the bounds of the SSBO.
391                 // Helper pixels are also initially "active" and if a thread gets one as its
392                 // partner in SCOPE_SUBGROUP mode, it can't run the test.
393                 css << "   if (gl_HelperInvocation) { return; }\n";
394         }
395
396         // Compute coordinates based on the storage class and scope.
397         // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
398         // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
399         // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
400         switch (invocationMapping)
401         {
402         default: DE_ASSERT(0); // fall through
403         case SCOPE_SUBGROUP:
404                 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
405                 css << "   uvec4 ballot = subgroupBallot(true);\n"
406                            "   if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
407
408                 switch (m_data.stage)
409                 {
410                 default: DE_ASSERT(0); // fall through
411                 case STAGE_COMPUTE:
412                         css <<
413                         "   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
414                         "   ivec2 partnerLocalId    = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
415                         "   uint sharedCoord        = localId.y * DIM + localId.x;\n"
416                         "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
417                         "   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
418                         "   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
419                         "   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
420                         "   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
421                         break;
422                 case STAGE_VERTEX:
423                         css <<
424                         "   uint bufferCoord        = gl_VertexIndex;\n"
425                         "   uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
426                         "   ivec2 imageCoord        = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
427                         "   ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
428                         "   gl_PointSize            = 1.0f;\n"
429                         "   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
430                         break;
431                 case STAGE_FRAGMENT:
432                         css <<
433                         "   ivec2 localId        = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
434                         "   ivec2 groupId        = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
435                         "   ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
436                         "   ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
437                         "   uint sharedCoord     = localId.y * DIM + localId.x;\n"
438                         "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
439                         "   uint bufferCoord     = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
440                         "   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
441                         "   ivec2 imageCoord     = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
442                         "   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
443                         break;
444                 }
445                 break;
446         case SCOPE_WORKGROUP:
447                 css <<
448                 "   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
449                 "   ivec2 partnerLocalId    = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
450                 "   uint sharedCoord        = localId.y * DIM + localId.x;\n"
451                 "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
452                 "   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
453                 "   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
454                 "   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
455                 "   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
456                 break;
457         case SCOPE_QUEUEFAMILY:
458         case SCOPE_DEVICE:
459                 switch (m_data.stage)
460                 {
461                 default: DE_ASSERT(0); // fall through
462                 case STAGE_COMPUTE:
463                         css <<
464                         "   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
465                         "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
466                         "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
467                         "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
468                         "   ivec2 imageCoord        = globalId;\n"
469                         "   ivec2 partnerImageCoord = partnerGlobalId;\n";
470                         break;
471                 case STAGE_VERTEX:
472                         css <<
473                         "   ivec2 globalId          = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
474                         "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
475                         "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
476                         "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
477                         "   ivec2 imageCoord        = globalId;\n"
478                         "   ivec2 partnerImageCoord = partnerGlobalId;\n"
479                         "   gl_PointSize            = 1.0f;\n"
480                         "   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
481                         break;
482                 case STAGE_FRAGMENT:
483                         css <<
484                         "   ivec2 localId       = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
485                         "   ivec2 groupId       = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
486                         "   ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
487                         "   ivec2 partnerGroupId = groupId;\n"
488                         "   uint sharedCoord    = localId.y * DIM + localId.x;\n"
489                         "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
490                         "   uint bufferCoord    = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
491                         "   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
492                         "   ivec2 imageCoord    = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
493                         "   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
494                         break;
495                 }
496                 break;
497         }
498
499         // Initialize shared memory, followed by a barrier
500         if (m_data.payloadSC == SC_WORKGROUP)
501         {
502                 css << "   payload.x[sharedCoord] = 0;\n";
503         }
504         if (m_data.guardSC == SC_WORKGROUP)
505         {
506                 css << "   guard.x[sharedCoord] = 0;\n";
507         }
508         if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
509         {
510                 switch (invocationMapping)
511                 {
512                 default: DE_ASSERT(0); // fall through
513                 case SCOPE_SUBGROUP:    css << "   subgroupBarrier();\n"; break;
514                 case SCOPE_WORKGROUP:   css << "   barrier();\n"; break;
515                 }
516         }
517
518         if (m_data.testType == TT_MP)
519         {
520                 // Store payload
521                 switch (m_data.payloadSC)
522                 {
523                 default: DE_ASSERT(0); // fall through
524                 case SC_BUFFER:         css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
525                 case SC_IMAGE:          css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
526                 case SC_WORKGROUP:      css << "   payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
527                 }
528         }
529         else
530         {
531                 DE_ASSERT(m_data.testType == TT_WAR);
532                 // Load payload
533                 switch (m_data.payloadSC)
534                 {
535                 default: DE_ASSERT(0); // fall through
536                 case SC_BUFFER:         css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
537                 case SC_IMAGE:          css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
538                 case SC_WORKGROUP:      css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
539                 }
540         }
541         if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
542         {
543                 // Acquire and release separate from control barrier
544                 css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
545                            "   controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
546                            "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
547         }
548         else if (m_data.syncType == ST_CONTROL_BARRIER)
549         {
550                 // Control barrier performs both acquire and release
551                 css << "   controlBarrier(" << scopeStr << ", " << scopeStr << ", "
552                                                                         << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
553                                                                         << semanticsAcquireRelease.str() << ");\n";
554         }
555         else
556         {
557                 // Release barrier
558                 std::stringstream atomicReleaseSemantics;
559                 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
560                 {
561                         css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
562                         atomicReleaseSemantics << ", 0, 0";
563                 }
564                 else
565                 {
566                         atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
567                 }
568                 // Atomic store guard
569                 if (m_data.atomicRMW)
570                 {
571                         switch (m_data.guardSC)
572                         {
573                         default: DE_ASSERT(0); // fall through
574                         case SC_BUFFER:         css << "   atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
575                         case SC_IMAGE:          css << "   imageAtomicExchange(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
576                         case SC_WORKGROUP:      css << "   atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
577                         }
578                 }
579                 else
580                 {
581                         switch (m_data.guardSC)
582                         {
583                         default: DE_ASSERT(0); // fall through
584                         case SC_BUFFER:         css << "   atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
585                         case SC_IMAGE:          css << "   imageAtomicStore(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
586                         case SC_WORKGROUP:      css << "   atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
587                         }
588                 }
589
590                 std::stringstream atomicAcquireSemantics;
591                 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
592                 {
593                         atomicAcquireSemantics << ", 0, 0";
594                 }
595                 else
596                 {
597                         atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
598                 }
599                 // Atomic load guard
600                 if (m_data.atomicRMW)
601                 {
602                         switch (m_data.guardSC)
603                         {
604                         default: DE_ASSERT(0); // fall through
605                         case SC_BUFFER:         css << "   skip = atomicExchange(guard.x[partnerBufferCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
606                         case SC_IMAGE:          css << "   skip = imageAtomicExchange(guard, partnerImageCoord, 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
607                         case SC_WORKGROUP:      css << "   skip = atomicExchange(guard.x[partnerSharedCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
608                         }
609                 } else
610                 {
611                         switch (m_data.guardSC)
612                         {
613                         default: DE_ASSERT(0); // fall through
614                         case SC_BUFFER:         css << "   skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
615                         case SC_IMAGE:          css << "   skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
616                         case SC_WORKGROUP:      css << "   skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
617                         }
618                 }
619                 // Acquire barrier
620                 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
621                 {
622                         css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
623                 }
624         }
625         if (m_data.testType == TT_MP)
626         {
627                 // Load payload
628                 switch (m_data.payloadSC)
629                 {
630                 default: DE_ASSERT(0); // fall through
631                 case SC_BUFFER:         css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
632                 case SC_IMAGE:          css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
633                 case SC_WORKGROUP:      css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
634                 }
635                 css <<
636                         "   if (!skip && r != partnerBufferCoord) { fail.x[bufferCoord] = 1; }\n"
637                         "}\n";
638         }
639         else
640         {
641                 DE_ASSERT(m_data.testType == TT_WAR);
642                 // Store payload, only if the partner invocation has already done its read
643                 css << "   if (!skip) {\n   ";
644                 switch (m_data.payloadSC)
645                 {
646                 default: DE_ASSERT(0); // fall through
647                 case SC_BUFFER:         css << "   payload.x[bufferCoord] = bufferCoord;\n"; break;
648                 case SC_IMAGE:          css << "   imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n"; break;
649                 case SC_WORKGROUP:      css << "   payload.x[sharedCoord] = bufferCoord;\n"; break;
650                 }
651                 css <<
652                         "   }\n"
653                         "   if (r != 0) { fail.x[bufferCoord] = 1; }\n"
654                         "}\n";
655         }
656
657         // Draw a fullscreen triangle strip based on gl_VertexIndex
658         std::stringstream vss;
659         vss <<
660                 "#version 450 core\n"
661                 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
662                 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
663
664         const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
665
666         switch (m_data.stage)
667         {
668         default: DE_ASSERT(0); // fall through
669         case STAGE_COMPUTE:
670                 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
671                 break;
672         case STAGE_VERTEX:
673                 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
674                 break;
675         case STAGE_FRAGMENT:
676                 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
677                 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
678                 break;
679         }
680 }
681
682 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
683 {
684         return new MemoryModelTestInstance(context, m_data);
685 }
686
687 VkBufferCreateInfo makeBufferCreateInfo (const VkDeviceSize                     bufferSize,
688                                                                                  const VkBufferUsageFlags       usage)
689 {
690         const VkBufferCreateInfo bufferCreateInfo =
691         {
692                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,   // VkStructureType              sType;
693                 DE_NULL,                                                                // const void*                  pNext;
694                 (VkBufferCreateFlags)0,                                 // VkBufferCreateFlags  flags;
695                 bufferSize,                                                             // VkDeviceSize                 size;
696                 usage,                                                                  // VkBufferUsageFlags   usage;
697                 VK_SHARING_MODE_EXCLUSIVE,                              // VkSharingMode                sharingMode;
698                 0u,                                                                             // deUint32                             queueFamilyIndexCount;
699                 DE_NULL,                                                                // const deUint32*              pQueueFamilyIndices;
700         };
701         return bufferCreateInfo;
702 }
703
704 Move<VkDescriptorSet> makeDescriptorSet (const DeviceInterface&                 vk,
705                                                                                  const VkDevice                                 device,
706                                                                                  const VkDescriptorPool                 descriptorPool,
707                                                                                  const VkDescriptorSetLayout    setLayout)
708 {
709         const VkDescriptorSetAllocateInfo allocateParams =
710         {
711                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,         // VkStructureType                              sType;
712                 DE_NULL,                                                                                        // const void*                                  pNext;
713                 descriptorPool,                                                                         // VkDescriptorPool                             descriptorPool;
714                 1u,                                                                                                     // deUint32                                             setLayoutCount;
715                 &setLayout,                                                                                     // const VkDescriptorSetLayout* pSetLayouts;
716         };
717         return allocateDescriptorSet(vk, device, &allocateParams);
718 }
719
720 tcu::TestStatus MemoryModelTestInstance::iterate (void)
721 {
722         const DeviceInterface&  vk                                              = m_context.getDeviceInterface();
723         const VkDevice                  device                                  = m_context.getDevice();
724         Allocator&                              allocator                               = m_context.getDefaultAllocator();
725
726         VkPhysicalDeviceProperties2 properties;
727         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
728         properties.pNext = NULL;
729
730         m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
731
732         deUint32 DIM = 31;
733         deUint32 NUM_WORKGROUP_EACH_DIM = 8;
734         // If necessary, shrink workgroup size to fit HW limits
735         if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
736         {
737                 DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
738         }
739         deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
740
741         VkDeviceSize bufferSizes[3];
742         de::MovePtr<BufferWithMemory> buffers[3];
743         vk::VkDescriptorBufferInfo bufferDescriptors[3];
744         de::MovePtr<BufferWithMemory> copyBuffer;
745
746         for (deUint32 i = 0; i < 3; ++i)
747         {
748                 size_t elementSize = m_data.dataType == DATA_TYPE_UINT64 ? sizeof(deUint64) : sizeof(deUint32);
749                 // buffer2 is the "fail" buffer, and is always uint
750                 if (i == 2)
751                         elementSize = sizeof(deUint32);
752                 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
753
754                 bool local;
755                 switch (i)
756                 {
757                 default: DE_ASSERT(0); // fall through
758                 case 0:
759                         if (m_data.payloadSC != SC_BUFFER)
760                                 continue;
761                         local = m_data.payloadMemLocal;
762                         break;
763                 case 1:
764                         if (m_data.guardSC != SC_BUFFER)
765                                 continue;
766                         local = m_data.guardMemLocal;
767                         break;
768                 case 2: local = true; break;
769                 }
770
771                 try
772                 {
773                         buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
774                                 vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
775                                 local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
776                 }
777                 catch (const tcu::NotSupportedError&)
778                 {
779                         if (!local)
780                         {
781                                 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
782                         }
783                         throw;
784                 }
785                 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
786         }
787
788         // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
789         try
790         {
791                 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
792                         vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
793         }
794         catch (const tcu::NotSupportedError&)
795         {
796                 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
797                         vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
798         }
799
800         const VkImageCreateInfo                 imageCreateInfo                 =
801         {
802                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,    // VkStructureType                sType;
803                 DE_NULL,                                                                // const void*                    pNext;
804                 (VkImageCreateFlags)0u,                                 // VkImageCreateFlags      flags;
805                 VK_IMAGE_TYPE_2D,                                               // VkImageType                    imageType;
806                 VK_FORMAT_R32_UINT,                                             // VkFormat                              format;
807                 {
808                         DIM*NUM_WORKGROUP_EACH_DIM,     // deUint32     width;
809                         DIM*NUM_WORKGROUP_EACH_DIM,     // deUint32     height;
810                         1u              // deUint32     depth;
811                 },                                                                              // VkExtent3D                      extent;
812                 1u,                                                                             // deUint32                              mipLevels;
813                 1u,                                                                             // deUint32                              arrayLayers;
814                 VK_SAMPLE_COUNT_1_BIT,                                  // VkSampleCountFlagBits        samples;
815                 VK_IMAGE_TILING_OPTIMAL,                                // VkImageTiling                        tiling;
816                 VK_IMAGE_USAGE_STORAGE_BIT
817                 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
818                 | VK_IMAGE_USAGE_TRANSFER_DST_BIT,              // VkImageUsageFlags            usage;
819                 VK_SHARING_MODE_EXCLUSIVE,                              // VkSharingMode                        sharingMode;
820                 0u,                                                                             // deUint32                              queueFamilyIndexCount;
821                 DE_NULL,                                                                // const deUint32*                pQueueFamilyIndices;
822                 VK_IMAGE_LAYOUT_UNDEFINED                               // VkImageLayout                        initialLayout;
823         };
824         VkImageViewCreateInfo           imageViewCreateInfo             =
825         {
826                 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,       // VkStructureType                      sType;
827                 DE_NULL,                                                                        // const void*                          pNext;
828                 (VkImageViewCreateFlags)0u,                                     // VkImageViewCreateFlags        flags;
829                 DE_NULL,                                                                        // VkImage                                      image;
830                 VK_IMAGE_VIEW_TYPE_2D,                                          // VkImageViewType                      viewType;
831                 VK_FORMAT_R32_UINT,                                                                             // VkFormat                                format;
832                 {
833                         VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle   r;
834                         VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle   g;
835                         VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle   b;
836                         VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle   a;
837                 },                                                                                      // VkComponentMapping            components;
838                 {
839                         VK_IMAGE_ASPECT_COLOR_BIT,      // VkImageAspectFlags   aspectMask;
840                         0u,                                                     // deUint32                       baseMipLevel;
841                         1u,                                                     // deUint32                       levelCount;
842                         0u,                                                     // deUint32                       baseArrayLayer;
843                         1u                                                      // deUint32                       layerCount;
844                 }                                                                                       // VkImageSubresourceRange      subresourceRange;
845         };
846
847
848         de::MovePtr<ImageWithMemory> images[2];
849         Move<VkImageView> imageViews[2];
850         vk::VkDescriptorImageInfo imageDescriptors[2];
851
852         for (deUint32 i = 0; i < 2; ++i)
853         {
854
855                 bool local;
856                 switch (i)
857                 {
858                 default: DE_ASSERT(0); // fall through
859                 case 0:
860                         if (m_data.payloadSC != SC_IMAGE)
861                                 continue;
862                         local = m_data.payloadMemLocal;
863                         break;
864                 case 1:
865                         if (m_data.guardSC != SC_IMAGE)
866                                 continue;
867                         local = m_data.guardMemLocal;
868                         break;
869                 }
870
871                 try
872                 {
873                         images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
874                                 vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
875                 }
876                 catch (const tcu::NotSupportedError&)
877                 {
878                         if (!local)
879                         {
880                                 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
881                         }
882                         throw;
883                 }
884                 imageViewCreateInfo.image = **images[i];
885                 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
886
887                 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
888         }
889
890         vk::DescriptorSetLayoutBuilder layoutBuilder;
891
892         switch (m_data.payloadSC)
893         {
894         default:
895         case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
896         case SC_IMAGE:  layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
897         }
898         switch (m_data.guardSC)
899         {
900         default:
901         case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
902         case SC_IMAGE:  layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
903         }
904         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
905
906         vk::Unique<vk::VkDescriptorSetLayout>   descriptorSetLayout(layoutBuilder.build(vk, device));
907
908         vk::Unique<vk::VkDescriptorPool>                descriptorPool(vk::DescriptorPoolBuilder()
909                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
910                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
911                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
912         vk::Unique<vk::VkDescriptorSet>                 descriptorSet           (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
913
914         vk::DescriptorSetUpdateBuilder setUpdateBuilder;
915         switch (m_data.payloadSC)
916         {
917         default: DE_ASSERT(0); // fall through
918         case SC_WORKGROUP:
919                 break;
920         case SC_BUFFER:
921                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
922                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
923                 break;
924         case SC_IMAGE:
925                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
926                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
927                 break;
928         }
929         switch (m_data.guardSC)
930         {
931         default: DE_ASSERT(0); // fall through
932         case SC_WORKGROUP:
933                 break;
934         case SC_BUFFER:
935                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
936                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
937                 break;
938         case SC_IMAGE:
939                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
940                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
941                 break;
942         }
943         setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
944                 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
945
946         setUpdateBuilder.update(vk, device);
947
948
949         const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
950         {
951                 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,                          // sType
952                 DE_NULL,                                                                                                        // pNext
953                 (VkPipelineLayoutCreateFlags)0,
954                 1,                                                                                                                      // setLayoutCount
955                 &descriptorSetLayout.get(),                                                                     // pSetLayouts
956                 0u,                                                                                                                     // pushConstantRangeCount
957                 DE_NULL,                                                                                                        // pPushConstantRanges
958         };
959
960         Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
961
962         Move<VkPipeline> pipeline;
963         Move<VkRenderPass> renderPass;
964         Move<VkFramebuffer> framebuffer;
965
966         VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
967
968         const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
969
970         const vk::VkSpecializationMapEntry entries[3] =
971         {
972                 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
973                 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
974         };
975
976         const vk::VkSpecializationInfo specInfo =
977         {
978                 2,                                              // mapEntryCount
979                 entries,                                // pMapEntries
980                 sizeof(specData),               // dataSize
981                 specData                                // pData
982         };
983
984         if (m_data.stage == STAGE_COMPUTE)
985         {
986                 const Unique<VkShaderModule>    shader                                          (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
987
988                 const VkPipelineShaderStageCreateInfo   shaderCreateInfo =
989                 {
990                         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
991                         DE_NULL,
992                         (VkPipelineShaderStageCreateFlags)0,
993                         VK_SHADER_STAGE_COMPUTE_BIT,                                                            // stage
994                         *shader,                                                                                                        // shader
995                         "main",
996                         &specInfo,                                                                                                      // pSpecializationInfo
997                 };
998
999                 const VkComputePipelineCreateInfo               pipelineCreateInfo =
1000                 {
1001                         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1002                         DE_NULL,
1003                         0u,                                                                                                                     // flags
1004                         shaderCreateInfo,                                                                                       // cs
1005                         *pipelineLayout,                                                                                        // layout
1006                         (vk::VkPipeline)0,                                                                                      // basePipelineHandle
1007                         0u,                                                                                                                     // basePipelineIndex
1008                 };
1009                 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1010         }
1011         else
1012         {
1013
1014                 const vk::VkSubpassDescription          subpassDesc                     =
1015                 {
1016                         (vk::VkSubpassDescriptionFlags)0,
1017                         vk::VK_PIPELINE_BIND_POINT_GRAPHICS,                                    // pipelineBindPoint
1018                         0u,                                                                                                             // inputCount
1019                         DE_NULL,                                                                                                // pInputAttachments
1020                         0u,                                                                                                             // colorCount
1021                         DE_NULL,                                                                                                // pColorAttachments
1022                         DE_NULL,                                                                                                // pResolveAttachments
1023                         DE_NULL,                                                                                                // depthStencilAttachment
1024                         0u,                                                                                                             // preserveCount
1025                         DE_NULL,                                                                                                // pPreserveAttachments
1026
1027                 };
1028                 const vk::VkRenderPassCreateInfo        renderPassParams        =
1029                 {
1030                         vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,                  // sType
1031                         DE_NULL,                                                                                                // pNext
1032                         (vk::VkRenderPassCreateFlags)0,
1033                         0u,                                                                                                             // attachmentCount
1034                         DE_NULL,                                                                                                // pAttachments
1035                         1u,                                                                                                             // subpassCount
1036                         &subpassDesc,                                                                                   // pSubpasses
1037                         0u,                                                                                                             // dependencyCount
1038                         DE_NULL,                                                                                                // pDependencies
1039                 };
1040
1041                 renderPass = createRenderPass(vk, device, &renderPassParams);
1042
1043                 const vk::VkFramebufferCreateInfo       framebufferParams       =
1044                 {
1045                         vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,  // sType
1046                         DE_NULL,                                                                                // pNext
1047                         (vk::VkFramebufferCreateFlags)0,
1048                         *renderPass,                                                                    // renderPass
1049                         0u,                                                                                             // attachmentCount
1050                         DE_NULL,                                                                                // pAttachments
1051                         DIM*NUM_WORKGROUP_EACH_DIM,                                             // width
1052                         DIM*NUM_WORKGROUP_EACH_DIM,                                             // height
1053                         1u,                                                                                             // layers
1054                 };
1055
1056                 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1057
1058                 const VkPipelineVertexInputStateCreateInfo              vertexInputStateCreateInfo              =
1059                 {
1060                         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                      sType;
1061                         DE_NULL,                                                                                                        // const void*                                                          pNext;
1062                         (VkPipelineVertexInputStateCreateFlags)0,                                       // VkPipelineVertexInputStateCreateFlags        flags;
1063                         0u,                                                                                                                     // deUint32                                                                     vertexBindingDescriptionCount;
1064                         DE_NULL,                                                                                                        // const VkVertexInputBindingDescription*       pVertexBindingDescriptions;
1065                         0u,                                                                                                                     // deUint32                                                                     vertexAttributeDescriptionCount;
1066                         DE_NULL                                                                                                         // const VkVertexInputAttributeDescription*     pVertexAttributeDescriptions;
1067                 };
1068
1069                 const VkPipelineInputAssemblyStateCreateInfo    inputAssemblyStateCreateInfo    =
1070                 {
1071                         VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,    // VkStructureType                                                      sType;
1072                         DE_NULL,                                                                                                                // const void*                                                          pNext;
1073                         (VkPipelineInputAssemblyStateCreateFlags)0,                                             // VkPipelineInputAssemblyStateCreateFlags      flags;
1074                         (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology                                                topology;
1075                         VK_FALSE                                                                                                                // VkBool32                                                                     primitiveRestartEnable;
1076                 };
1077
1078                 const VkPipelineRasterizationStateCreateInfo    rasterizationStateCreateInfo    =
1079                 {
1080                         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,             // VkStructureType                                                      sType;
1081                         DE_NULL,                                                                                                                // const void*                                                          pNext;
1082                         (VkPipelineRasterizationStateCreateFlags)0,                                             // VkPipelineRasterizationStateCreateFlags      flags;
1083                         VK_FALSE,                                                                                                               // VkBool32                                                                     depthClampEnable;
1084                         (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,                    // VkBool32                                                                     rasterizerDiscardEnable;
1085                         VK_POLYGON_MODE_FILL,                                                                                   // VkPolygonMode                                                        polygonMode;
1086                         VK_CULL_MODE_NONE,                                                                                              // VkCullModeFlags                                                      cullMode;
1087                         VK_FRONT_FACE_CLOCKWISE,                                                                                // VkFrontFace                                                          frontFace;
1088                         VK_FALSE,                                                                                                               // VkBool32                                                                     depthBiasEnable;
1089                         0.0f,                                                                                                                   // float                                                                        depthBiasConstantFactor;
1090                         0.0f,                                                                                                                   // float                                                                        depthBiasClamp;
1091                         0.0f,                                                                                                                   // float                                                                        depthBiasSlopeFactor;
1092                         1.0f                                                                                                                    // float                                                                        lineWidth;
1093                 };
1094
1095                 const VkPipelineMultisampleStateCreateInfo              multisampleStateCreateInfo =
1096                 {
1097                         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,       // VkStructureType                                                sType
1098                         DE_NULL,                                                                                                        // const void*                                                    pNext
1099                         0u,                                                                                                                     // VkPipelineMultisampleStateCreateFlags        flags
1100                         VK_SAMPLE_COUNT_1_BIT,                                                                          // VkSampleCountFlagBits                                        rasterizationSamples
1101                         VK_FALSE,                                                                                                       // VkBool32                                                              sampleShadingEnable
1102                         1.0f,                                                                                                           // float                                                                        minSampleShading
1103                         DE_NULL,                                                                                                        // const VkSampleMask*                                    pSampleMask
1104                         VK_FALSE,                                                                                                       // VkBool32                                                              alphaToCoverageEnable
1105                         VK_FALSE                                                                                                        // VkBool32                                                              alphaToOneEnable
1106                 };
1107
1108                 VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1109                 VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1110
1111                 const VkPipelineViewportStateCreateInfo                 viewportStateCreateInfo                         =
1112                 {
1113                         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,  // VkStructureType                                                       sType
1114                         DE_NULL,                                                                                                // const void*                                                           pNext
1115                         (VkPipelineViewportStateCreateFlags)0,                                  // VkPipelineViewportStateCreateFlags             flags
1116                         1u,                                                                                                             // deUint32                                                                     viewportCount
1117                         &viewport,                                                                                              // const VkViewport*                                               pViewports
1118                         1u,                                                                                                             // deUint32                                                                     scissorCount
1119                         &scissor                                                                                                // const VkRect2D*                                                       pScissors
1120                 };
1121
1122                 Move<VkShaderModule> fs;
1123                 Move<VkShaderModule> vs;
1124
1125                 deUint32 numStages;
1126                 if (m_data.stage == STAGE_VERTEX)
1127                 {
1128                         vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1129                         fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1130                         numStages = 1u;
1131                 }
1132                 else
1133                 {
1134                         vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1135                         fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1136                         numStages = 2u;
1137                 }
1138
1139                 const VkPipelineShaderStageCreateInfo   shaderCreateInfo[2] = {
1140                         {
1141                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1142                                 DE_NULL,
1143                                 (VkPipelineShaderStageCreateFlags)0,
1144                                 VK_SHADER_STAGE_VERTEX_BIT,                                                                     // stage
1145                                 *vs,                                                                                                            // shader
1146                                 "main",
1147                                 &specInfo,                                                                                                      // pSpecializationInfo
1148                         },
1149                         {
1150                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1151                                 DE_NULL,
1152                                 (VkPipelineShaderStageCreateFlags)0,
1153                                 VK_SHADER_STAGE_FRAGMENT_BIT,                                                           // stage
1154                                 *fs,                                                                                                            // shader
1155                                 "main",
1156                                 &specInfo,                                                                                                      // pSpecializationInfo
1157                         }
1158                 };
1159
1160                 const VkGraphicsPipelineCreateInfo                              graphicsPipelineCreateInfo              =
1161                 {
1162                         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,        // VkStructureType                                                                      sType;
1163                         DE_NULL,                                                                                        // const void*                                                                          pNext;
1164                         (VkPipelineCreateFlags)0,                                                       // VkPipelineCreateFlags                                                        flags;
1165                         numStages,                                                                                      // deUint32                                                                                     stageCount;
1166                         &shaderCreateInfo[0],                                                           // const VkPipelineShaderStageCreateInfo*                       pStages;
1167                         &vertexInputStateCreateInfo,                                            // const VkPipelineVertexInputStateCreateInfo*          pVertexInputState;
1168                         &inputAssemblyStateCreateInfo,                                          // const VkPipelineInputAssemblyStateCreateInfo*        pInputAssemblyState;
1169                         DE_NULL,                                                                                        // const VkPipelineTessellationStateCreateInfo*         pTessellationState;
1170                         &viewportStateCreateInfo,                                                       // const VkPipelineViewportStateCreateInfo*                     pViewportState;
1171                         &rasterizationStateCreateInfo,                                          // const VkPipelineRasterizationStateCreateInfo*        pRasterizationState;
1172                         &multisampleStateCreateInfo,                                            // const VkPipelineMultisampleStateCreateInfo*          pMultisampleState;
1173                         DE_NULL,                                                                                        // const VkPipelineDepthStencilStateCreateInfo*         pDepthStencilState;
1174                         DE_NULL,                                                                                        // const VkPipelineColorBlendStateCreateInfo*           pColorBlendState;
1175                         DE_NULL,                                                                                        // const VkPipelineDynamicStateCreateInfo*                      pDynamicState;
1176                         pipelineLayout.get(),                                                           // VkPipelineLayout                                                                     layout;
1177                         renderPass.get(),                                                                       // VkRenderPass                                                                         renderPass;
1178                         0u,                                                                                                     // deUint32                                                                                     subpass;
1179                         DE_NULL,                                                                                        // VkPipeline                                                                           basePipelineHandle;
1180                         0                                                                                                       // int                                                                                          basePipelineIndex;
1181                 };
1182
1183                 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1184         }
1185
1186         const VkQueue                           queue                           = m_context.getUniversalQueue();
1187         Move<VkCommandPool>                             cmdPool                                 = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
1188         Move<VkCommandBuffer>                   cmdBuffer                               = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1189
1190         beginCommandBuffer(vk, *cmdBuffer, 0u);
1191
1192         vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1193
1194         for (deUint32 i = 0; i < 2; ++i)
1195         {
1196                 if (!images[i])
1197                         continue;
1198
1199                 const VkImageMemoryBarrier imageBarrier =
1200                 {
1201                         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                         // VkStructureType              sType
1202                         DE_NULL,                                                                                        // const void*                  pNext
1203                         0u,                                                                                                     // VkAccessFlags                srcAccessMask
1204                         VK_ACCESS_TRANSFER_WRITE_BIT,                                           // VkAccessFlags                dstAccessMask
1205                         VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                oldLayout
1206                         VK_IMAGE_LAYOUT_GENERAL,                                                        // VkImageLayout                newLayout
1207                         VK_QUEUE_FAMILY_IGNORED,                                                        // uint32_t                             srcQueueFamilyIndex
1208                         VK_QUEUE_FAMILY_IGNORED,                                                        // uint32_t                             dstQueueFamilyIndex
1209                         **images[i],                                                                            // VkImage                              image
1210                         {
1211                                 VK_IMAGE_ASPECT_COLOR_BIT,                              // VkImageAspectFlags   aspectMask
1212                                 0u,                                                                             // uint32_t                             baseMipLevel
1213                                 1u,                                                                             // uint32_t                             mipLevels,
1214                                 0u,                                                                             // uint32_t                             baseArray
1215                                 1u,                                                                             // uint32_t                             arraySize
1216                         }
1217                 };
1218
1219                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1220                                                          (VkDependencyFlags)0,
1221                                                           0, (const VkMemoryBarrier*)DE_NULL,
1222                                                           0, (const VkBufferMemoryBarrier*)DE_NULL,
1223                                                           1, &imageBarrier);
1224         }
1225
1226         vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1227         vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1228
1229         VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1230         VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1231
1232         VkMemoryBarrier                                 memBarrier =
1233         {
1234                 VK_STRUCTURE_TYPE_MEMORY_BARRIER,       // sType
1235                 DE_NULL,                                                        // pNext
1236                 0u,                                                                     // srcAccessMask
1237                 0u,                                                                     // dstAccessMask
1238         };
1239
1240         for (deUint32 iters = 0; iters < 200; ++iters)
1241         {
1242                 for (deUint32 i = 0; i < 2; ++i)
1243                 {
1244                         if (buffers[i])
1245                                 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1246                         if (images[i])
1247                                 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1248                 }
1249
1250                 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1251                 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1252                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1253                         0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1254
1255                 if (m_data.stage == STAGE_COMPUTE)
1256                 {
1257                         vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1258                 }
1259                 else
1260                 {
1261                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1262                                                         makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1263                                                         0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1264                         // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1265                         if (m_data.stage == STAGE_VERTEX)
1266                         {
1267                                 vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1268                         }
1269                         else
1270                         {
1271                                 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1272                         }
1273                         endRenderPass(vk, *cmdBuffer);
1274                 }
1275
1276                 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1277                 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1278                 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1279                         0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1280         }
1281
1282         const VkBufferCopy      copyParams =
1283         {
1284                 (VkDeviceSize)0u,                                               // srcOffset
1285                 (VkDeviceSize)0u,                                               // dstOffset
1286                 bufferSizes[2]                                                  // size
1287         };
1288
1289         vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, &copyParams);
1290
1291         endCommandBuffer(vk, *cmdBuffer);
1292
1293         submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1294
1295         tcu::TestLog& log = m_context.getTestContext().getLog();
1296
1297         deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1298         invalidateMappedMemoryRange(vk, device, copyBuffer->getAllocation().getMemory(), copyBuffer->getAllocation().getOffset(), bufferSizes[2]);
1299         qpTestResult res = QP_TEST_RESULT_PASS;
1300
1301         deUint32 numErrors = 0;
1302         for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1303         {
1304                 if (ptr[i] != 0)
1305                 {
1306                         if (numErrors < 256)
1307                         {
1308                                 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1309                         }
1310                         numErrors++;
1311                         res = QP_TEST_RESULT_FAIL;
1312                 }
1313         }
1314
1315         if (numErrors)
1316         {
1317                 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1318         }
1319
1320         return tcu::TestStatus(res, qpGetTestResultName(res));
1321 }
1322
1323 }       // anonymous
1324
1325 tcu::TestCaseGroup*     createTests (tcu::TestContext& testCtx)
1326 {
1327         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1328                         testCtx, "memory_model", "Memory model tests"));
1329
1330         typedef struct
1331         {
1332                 deUint32                                value;
1333                 const char*                             name;
1334                 const char*                             description;
1335         } TestGroupCase;
1336
1337         TestGroupCase ttCases[] =
1338         {
1339                 { TT_MP,        "message_passing",      "message passing"               },
1340                 { TT_WAR,       "write_after_read",     "write after read"              },
1341         };
1342
1343         TestGroupCase core11Cases[] =
1344         {
1345                 { 1,    "core11",       "Supported by Vulkan1.1"                                                        },
1346                 { 0,    "ext",          "Requires VK_KHR_vulkan_memory_model extension"         },
1347         };
1348
1349         TestGroupCase dtCases[] =
1350         {
1351                 { DATA_TYPE_UINT,       "u32",  "uint32_t atomics"              },
1352                 { DATA_TYPE_UINT64,     "u64",  "uint64_t atomics"              },
1353         };
1354
1355         TestGroupCase cohCases[] =
1356         {
1357                 { 1,    "coherent",             "coherent payload variable"                     },
1358                 { 0,    "noncoherent",  "noncoherent payload variable"          },
1359         };
1360
1361         TestGroupCase stCases[] =
1362         {
1363                 { ST_FENCE_FENCE,                                       "fence_fence",                                  "release fence, acquire fence"                  },
1364                 { ST_FENCE_ATOMIC,                                      "fence_atomic",                                 "release fence, atomic acquire"                 },
1365                 { ST_ATOMIC_FENCE,                                      "atomic_fence",                                 "atomic release, acquire fence"                 },
1366                 { ST_ATOMIC_ATOMIC,                                     "atomic_atomic",                                "atomic release, atomic acquire"                },
1367                 { ST_CONTROL_BARRIER,                           "control_barrier",                              "control barrier"                                               },
1368                 { ST_CONTROL_AND_MEMORY_BARRIER,        "control_and_memory_barrier",   "control barrier with release/acquire"  },
1369         };
1370
1371         TestGroupCase rmwCases[] =
1372         {
1373                 { 0,    "atomicwrite",          "atomic write"          },
1374                 { 1,    "atomicrmw",            "atomic rmw"            },
1375         };
1376
1377         TestGroupCase scopeCases[] =
1378         {
1379                 { SCOPE_DEVICE,                 "device",               "device scope"                  },
1380                 { SCOPE_QUEUEFAMILY,    "queuefamily",  "queuefamily scope"             },
1381                 { SCOPE_WORKGROUP,              "workgroup",    "workgroup scope"               },
1382                 { SCOPE_SUBGROUP,               "subgroup",             "subgroup scope"                },
1383         };
1384
1385         TestGroupCase plCases[] =
1386         {
1387                 { 0,    "payload_nonlocal",             "payload variable in non-local memory"          },
1388                 { 1,    "payload_local",                "payload variable in local memory"                      },
1389         };
1390
1391         TestGroupCase pscCases[] =
1392         {
1393                 { SC_BUFFER,    "buffer",               "payload variable in buffer memory"                     },
1394                 { SC_IMAGE,             "image",                "payload variable in image memory"                      },
1395                 { SC_WORKGROUP, "workgroup",    "payload variable in workgroup memory"          },
1396         };
1397
1398         TestGroupCase glCases[] =
1399         {
1400                 { 0,    "guard_nonlocal",               "guard variable in non-local memory"            },
1401                 { 1,    "guard_local",                  "guard variable in local memory"                        },
1402         };
1403
1404         TestGroupCase gscCases[] =
1405         {
1406                 { SC_BUFFER,    "buffer",               "guard variable in buffer memory"                       },
1407                 { SC_IMAGE,             "image",                "guard variable in image memory"                        },
1408                 { SC_WORKGROUP, "workgroup",    "guard variable in workgroup memory"            },
1409         };
1410
1411         TestGroupCase stageCases[] =
1412         {
1413                 { STAGE_COMPUTE,        "comp",         "compute shader"                        },
1414                 { STAGE_VERTEX,         "vert",         "vertex shader"                         },
1415                 { STAGE_FRAGMENT,       "frag",         "fragment shader"                       },
1416         };
1417
1418
1419         for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1420         {
1421                 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1422                 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1423                 {
1424                         de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1425                         for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1426                         {
1427                                 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1428                                 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1429                                 {
1430                                         de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1431                                         for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1432                                         {
1433                                                 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1434                                                 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1435                                                 {
1436                                                         de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1437                                                         for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1438                                                         {
1439                                                                 de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1440                                                                 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1441                                                                 {
1442                                                                         de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1443                                                                         for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1444                                                                         {
1445                                                                                 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1446                                                                                 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1447                                                                                 {
1448                                                                                         de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1449                                                                                         for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1450                                                                                         {
1451                                                                                                 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1452                                                                                                 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1453                                                                                                 {
1454                                                                                                         CaseDef c =
1455                                                                                                         {
1456                                                                                                                 !!plCases[plNdx].value,                                 // bool payloadMemLocal;
1457                                                                                                                 !!glCases[glNdx].value,                                 // bool guardMemLocal;
1458                                                                                                                 !!cohCases[cohNdx].value,                               // bool coherent;
1459                                                                                                                 !!core11Cases[core11Ndx].value,                 // bool core11;
1460                                                                                                                 !!rmwCases[rmwNdx].value,                               // bool atomicRMW;
1461                                                                                                                 (TestType)ttCases[ttNdx].value,                 // TestType testType;
1462                                                                                                                 (StorageClass)pscCases[pscNdx].value,   // StorageClass payloadSC;
1463                                                                                                                 (StorageClass)gscCases[gscNdx].value,   // StorageClass guardSC;
1464                                                                                                                 (Scope)scopeCases[scopeNdx].value,              // Scope scope;
1465                                                                                                                 (SyncType)stCases[stNdx].value,                 // SyncType syncType;
1466                                                                                                                 (Stage)stageCases[stageNdx].value,              // Stage stage;
1467                                                                                                                 (DataType)dtCases[dtNdx].value,                 // DataType dataType;
1468                                                                                                         };
1469
1470                                                                                                         // Mustpass11 tests should only exercise things we expect to work on
1471                                                                                                         // existing implementations. Exclude noncoherent tests which require
1472                                                                                                         // new extensions, and assume atomic synchronization wouldn't work
1473                                                                                                         // (i.e. atomics may be implemented as relaxed atomics). Exclude
1474                                                                                                         // queuefamily scope which doesn't exist in Vulkan 1.1.
1475                                                                                                         if (c.core11 &&
1476                                                                                                                 (c.coherent == 0 ||
1477                                                                                                                 c.syncType == ST_FENCE_ATOMIC ||
1478                                                                                                                 c.syncType == ST_ATOMIC_FENCE ||
1479                                                                                                                 c.syncType == ST_ATOMIC_ATOMIC ||
1480                                                                                                                 c.dataType == DATA_TYPE_UINT64 ||
1481                                                                                                                 c.scope == SCOPE_QUEUEFAMILY))
1482                                                                                                         {
1483                                                                                                                 continue;
1484                                                                                                         }
1485
1486                                                                                                         if (c.stage != STAGE_COMPUTE &&
1487                                                                                                                 c.scope == SCOPE_WORKGROUP)
1488                                                                                                         {
1489                                                                                                                 continue;
1490                                                                                                         }
1491
1492                                                                                                         // Don't exercise local and non-local for workgroup memory
1493                                                                                                         // Also don't exercise workgroup memory for non-compute stages
1494                                                                                                         if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1495                                                                                                         {
1496                                                                                                                 continue;
1497                                                                                                         }
1498                                                                                                         if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1499                                                                                                         {
1500                                                                                                                 continue;
1501                                                                                                         }
1502                                                                                                         // Can't do control barrier with larger than workgroup scope, or non-compute stages
1503                                                                                                         if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1504                                                                                                                 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1505                                                                                                         {
1506                                                                                                                 continue;
1507                                                                                                         }
1508
1509                                                                                                         // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1510                                                                                                         if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1511                                                                                                         {
1512                                                                                                                 continue;
1513                                                                                                         }
1514
1515                                                                                                         // uint64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1516                                                                                                         if (c.dataType == DATA_TYPE_UINT64 && c.syncType != ST_ATOMIC_ATOMIC)
1517                                                                                                         {
1518                                                                                                                 continue;
1519                                                                                                         }
1520
1521                                                                                                         // No 64-bit image types, so skip tests with both payload and guard in image memory
1522                                                                                                         if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1523                                                                                                         {
1524                                                                                                                 continue;
1525                                                                                                         }
1526
1527                                                                                                         // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1528                                                                                                         if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1529                                                                                                                 (c.guardSC != 0 || c.guardMemLocal != 0))
1530                                                                                                         {
1531                                                                                                                 continue;
1532                                                                                                         }
1533
1534                                                                                                         gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1535                                                                                                 }
1536                                                                                                 glGroup->addChild(gscGroup.release());
1537                                                                                         }
1538                                                                                         pscGroup->addChild(glGroup.release());
1539                                                                                 }
1540                                                                                 plGroup->addChild(pscGroup.release());
1541                                                                         }
1542                                                                         scopeGroup->addChild(plGroup.release());
1543                                                                 }
1544                                                                 rmwGroup->addChild(scopeGroup.release());
1545                                                         }
1546                                                         stGroup->addChild(rmwGroup.release());
1547                                                 }
1548                                                 cohGroup->addChild(stGroup.release());
1549                                         }
1550                                         dtGroup->addChild(cohGroup.release());
1551                                 }
1552                                 core11Group->addChild(dtGroup.release());
1553                         }
1554                         ttGroup->addChild(core11Group.release());
1555                 }
1556                 group->addChild(ttGroup.release());
1557         }
1558         return group.release();
1559 }
1560
1561 }       // MemoryModel
1562 }       // vkt