ecef08686630d2f0babd1e6ca6657ee3a9bca331
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / memory_model / vktMemoryModelMessagePassing.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2018 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *        http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Vulkan Memory Model tests
23  *//*--------------------------------------------------------------------*/
24
25 #include "vktMemoryModelTests.hpp"
26
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vktTestGroupUtil.hpp"
34 #include "vktTestCase.hpp"
35
36 #include "deDefs.h"
37 #include "deMath.h"
38 #include "deSharedPtr.hpp"
39 #include "deString.h"
40
41 #include "tcuTestCase.hpp"
42 #include "tcuTestLog.hpp"
43
44 #include <string>
45 #include <sstream>
46
47 namespace vkt
48 {
49 namespace MemoryModel
50 {
51 namespace
52 {
53 using namespace vk;
54 using namespace std;
55
56 typedef enum
57 {
58         TT_MP = 0,  // message passing
59         TT_WAR, // write-after-read hazard
60 } TestType;
61
62 typedef enum
63 {
64         ST_FENCE_FENCE = 0,
65         ST_FENCE_ATOMIC,
66         ST_ATOMIC_FENCE,
67         ST_ATOMIC_ATOMIC,
68         ST_CONTROL_BARRIER,
69         ST_CONTROL_AND_MEMORY_BARRIER,
70 } SyncType;
71
72 typedef enum
73 {
74         SC_BUFFER = 0,
75         SC_IMAGE,
76         SC_WORKGROUP,
77 } StorageClass;
78
79 typedef enum
80 {
81         SCOPE_DEVICE = 0,
82         SCOPE_QUEUEFAMILY,
83         SCOPE_WORKGROUP,
84         SCOPE_SUBGROUP,
85 } Scope;
86
87 typedef enum
88 {
89         STAGE_COMPUTE = 0,
90         STAGE_VERTEX,
91         STAGE_FRAGMENT,
92 } Stage;
93
94 typedef enum
95 {
96         DATA_TYPE_UINT = 0,
97         DATA_TYPE_UINT64,
98 } DataType;
99
100 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
101 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
102
103 struct CaseDef
104 {
105         bool payloadMemLocal;
106         bool guardMemLocal;
107         bool coherent;
108         bool core11;
109         bool atomicRMW;
110         TestType testType;
111         StorageClass payloadSC;
112         StorageClass guardSC;
113         Scope scope;
114         SyncType syncType;
115         Stage stage;
116         DataType dataType;
117 };
118
119 class MemoryModelTestInstance : public TestInstance
120 {
121 public:
122                                                 MemoryModelTestInstance (Context& context, const CaseDef& data);
123                                                 ~MemoryModelTestInstance        (void);
124         tcu::TestStatus         iterate                         (void);
125 private:
126         CaseDef                 m_data;
127
128         enum
129         {
130                 WIDTH = 256,
131                 HEIGHT = 256
132         };
133 };
134
135 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
136         : vkt::TestInstance             (context)
137         , m_data                                (data)
138 {
139 }
140
141 MemoryModelTestInstance::~MemoryModelTestInstance (void)
142 {
143 }
144
145 class MemoryModelTestCase : public TestCase
146 {
147         public:
148                                                                 MemoryModelTestCase             (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
149                                                                 ~MemoryModelTestCase    (void);
150         virtual void                            initPrograms            (SourceCollections& programCollection) const;
151         virtual TestInstance*           createInstance          (Context& context) const;
152         virtual void                            checkSupport            (Context& context) const;
153
154 private:
155         CaseDef                                 m_data;
156 };
157
158 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
159         : vkt::TestCase (context, name, desc)
160         , m_data                (data)
161 {
162 }
163
164 MemoryModelTestCase::~MemoryModelTestCase       (void)
165 {
166 }
167
168 void MemoryModelTestCase::checkSupport(Context& context) const
169 {
170         if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
171         {
172                 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
173         }
174
175         if (!m_data.core11)
176         {
177                 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
178                 {
179                         TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
180                 }
181
182                 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
183                 {
184                         TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
185                 }
186         }
187
188         if (m_data.scope == SCOPE_SUBGROUP)
189         {
190                 // Check for subgroup support for scope_subgroup tests.
191                 VkPhysicalDeviceSubgroupProperties subgroupProperties;
192                 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
193                 subgroupProperties.pNext = DE_NULL;
194                 subgroupProperties.supportedOperations = 0;
195
196                 VkPhysicalDeviceProperties2 properties;
197                 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
198                 properties.pNext = &subgroupProperties;
199
200                 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
201
202                 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
203                         !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
204                         !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
205                 {
206                         TCU_THROW(NotSupportedError, "Subgroup features not supported");
207                 }
208         }
209         if (m_data.dataType == DATA_TYPE_UINT64)
210         {
211                 if (!context.getDeviceFeatures().shaderInt64)
212                 {
213                         TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
214                 }
215                 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
216                         m_data.guardSC == SC_BUFFER)
217                 {
218                         TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
219                 }
220                 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
221                         m_data.guardSC == SC_WORKGROUP)
222                 {
223                         TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
224                 }
225         }
226 }
227
228
229 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
230 {
231         Scope invocationMapping = m_data.scope;
232         if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
233                 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
234         {
235                 invocationMapping = SCOPE_WORKGROUP;
236         }
237
238         const char *scopeStr;
239         switch (m_data.scope)
240         {
241         default: DE_ASSERT(0); // fall through
242         case SCOPE_DEVICE:              scopeStr = "gl_ScopeDevice"; break;
243         case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break;
244         case SCOPE_WORKGROUP:   scopeStr = "gl_ScopeWorkgroup"; break;
245         case SCOPE_SUBGROUP:    scopeStr = "gl_ScopeSubgroup"; break;
246         }
247
248         const char *typeStr = m_data.dataType == DATA_TYPE_UINT64 ? "uint64_t" : "uint";
249
250         // Construct storageSemantics strings. Both release and acquire
251         // always have the payload storage class. They only include the
252         // guard storage class if they're using FENCE for that side of the
253         // sync.
254         std::stringstream storageSemanticsRelease;
255         switch (m_data.payloadSC)
256         {
257         default: DE_ASSERT(0); // fall through
258         case SC_BUFFER:         storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
259         case SC_IMAGE:          storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
260         case SC_WORKGROUP:      storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
261         }
262         std::stringstream storageSemanticsAcquire;
263         storageSemanticsAcquire << storageSemanticsRelease.str();
264         if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
265         {
266                 switch (m_data.guardSC)
267                 {
268                 default: DE_ASSERT(0); // fall through
269                 case SC_BUFFER:         storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
270                 case SC_IMAGE:          storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
271                 case SC_WORKGROUP:      storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
272                 }
273         }
274         if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
275         {
276                 switch (m_data.guardSC)
277                 {
278                 default: DE_ASSERT(0); // fall through
279                 case SC_BUFFER:         storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
280                 case SC_IMAGE:          storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
281                 case SC_WORKGROUP:      storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
282                 }
283         }
284
285         std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
286
287         semanticsRelease << "gl_SemanticsRelease";
288         semanticsAcquire << "gl_SemanticsAcquire";
289         semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
290         if (!m_data.coherent && m_data.testType != TT_WAR)
291         {
292                 DE_ASSERT(!m_data.core11);
293                 semanticsRelease << " | gl_SemanticsMakeAvailable";
294                 semanticsAcquire << " | gl_SemanticsMakeVisible";
295                 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
296         }
297
298         std::stringstream css;
299         css << "#version 450 core\n";
300         if (!m_data.core11)
301         {
302                 css << "#pragma use_vulkan_memory_model\n";
303         }
304         css <<
305                 "#extension GL_KHR_shader_subgroup_basic : enable\n"
306                 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
307                 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
308                 "#extension GL_KHR_memory_scope_semantics : enable\n"
309                 "#extension GL_ARB_gpu_shader_int64 : enable\n"
310                 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
311                 "layout(constant_id = 0) const int DIM = 1;\n"
312                 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
313                 "struct S { " << typeStr << " x[DIM*DIM]; };\n";
314
315         if (m_data.stage == STAGE_COMPUTE)
316         {
317                 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
318         }
319
320         const char *memqual = "";
321         if (m_data.coherent)
322         {
323                 if (m_data.core11)
324                 {
325                         // Vulkan 1.1 only has "coherent", use it regardless of scope
326                         memqual = "coherent";
327                 }
328                 else
329                 {
330                         switch (m_data.scope)
331                         {
332                         default: DE_ASSERT(0); // fall through
333                         case SCOPE_DEVICE:              memqual = "devicecoherent"; break;
334                         case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break;
335                         case SCOPE_WORKGROUP:   memqual = "workgroupcoherent"; break;
336                         case SCOPE_SUBGROUP:    memqual = "subgroupcoherent"; break;
337                         }
338                 }
339         }
340         else
341         {
342                 DE_ASSERT(!m_data.core11);
343                 memqual = "nonprivate";
344         }
345
346         // Declare payload, guard, and fail resources
347         switch (m_data.payloadSC)
348         {
349         default: DE_ASSERT(0); // fall through
350         case SC_BUFFER:         css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
351         case SC_IMAGE:          css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; break;
352         case SC_WORKGROUP:      css << "shared S payload;\n"; break;
353         }
354         if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
355         {
356                 // The guard variable is only accessed with atomics and need not be declared coherent.
357                 switch (m_data.guardSC)
358                 {
359                 default: DE_ASSERT(0); // fall through
360                 case SC_BUFFER:         css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
361                 case SC_IMAGE:          css << "layout(set=0, binding=1, r32ui) uniform uimage2D guard;\n"; break;
362                 case SC_WORKGROUP:      css << "shared S guard;\n"; break;
363                 }
364         }
365
366         css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
367
368         css <<
369                 "void main()\n"
370                 "{\n"
371                 "   bool pass = true;\n"
372                 "   bool skip = false;\n";
373
374         if (m_data.stage == STAGE_FRAGMENT)
375         {
376                 // Kill helper invocations so they don't load outside the bounds of the SSBO.
377                 // Helper pixels are also initially "active" and if a thread gets one as its
378                 // partner in SCOPE_SUBGROUP mode, it can't run the test.
379                 css << "   if (gl_HelperInvocation) { return; }\n";
380         }
381
382         // Compute coordinates based on the storage class and scope.
383         // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
384         // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
385         // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
386         switch (invocationMapping)
387         {
388         default: DE_ASSERT(0); // fall through
389         case SCOPE_SUBGROUP:
390                 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
391                 css << "   uvec4 ballot = subgroupBallot(true);\n"
392                            "   if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
393
394                 switch (m_data.stage)
395                 {
396                 default: DE_ASSERT(0); // fall through
397                 case STAGE_COMPUTE:
398                         css <<
399                         "   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
400                         "   ivec2 partnerLocalId    = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
401                         "   uint sharedCoord        = localId.y * DIM + localId.x;\n"
402                         "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
403                         "   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
404                         "   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
405                         "   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
406                         "   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
407                         break;
408                 case STAGE_VERTEX:
409                         css <<
410                         "   uint bufferCoord        = gl_VertexIndex;\n"
411                         "   uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
412                         "   ivec2 imageCoord        = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
413                         "   ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
414                         "   gl_PointSize            = 1.0f;\n"
415                         "   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
416                         break;
417                 case STAGE_FRAGMENT:
418                         css <<
419                         "   ivec2 localId        = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
420                         "   ivec2 groupId        = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
421                         "   ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
422                         "   ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
423                         "   uint sharedCoord     = localId.y * DIM + localId.x;\n"
424                         "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
425                         "   uint bufferCoord     = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
426                         "   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
427                         "   ivec2 imageCoord     = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
428                         "   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
429                         break;
430                 }
431                 break;
432         case SCOPE_WORKGROUP:
433                 css <<
434                 "   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
435                 "   ivec2 partnerLocalId    = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
436                 "   uint sharedCoord        = localId.y * DIM + localId.x;\n"
437                 "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
438                 "   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
439                 "   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
440                 "   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
441                 "   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
442                 break;
443         case SCOPE_QUEUEFAMILY:
444         case SCOPE_DEVICE:
445                 switch (m_data.stage)
446                 {
447                 default: DE_ASSERT(0); // fall through
448                 case STAGE_COMPUTE:
449                         css <<
450                         "   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
451                         "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
452                         "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
453                         "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
454                         "   ivec2 imageCoord        = globalId;\n"
455                         "   ivec2 partnerImageCoord = partnerGlobalId;\n";
456                         break;
457                 case STAGE_VERTEX:
458                         css <<
459                         "   ivec2 globalId          = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
460                         "   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
461                         "   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
462                         "   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
463                         "   ivec2 imageCoord        = globalId;\n"
464                         "   ivec2 partnerImageCoord = partnerGlobalId;\n"
465                         "   gl_PointSize            = 1.0f;\n"
466                         "   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
467                         break;
468                 case STAGE_FRAGMENT:
469                         css <<
470                         "   ivec2 localId       = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
471                         "   ivec2 groupId       = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
472                         "   ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
473                         "   ivec2 partnerGroupId = groupId;\n"
474                         "   uint sharedCoord    = localId.y * DIM + localId.x;\n"
475                         "   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
476                         "   uint bufferCoord    = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
477                         "   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
478                         "   ivec2 imageCoord    = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
479                         "   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
480                         break;
481                 }
482                 break;
483         }
484
485         // Initialize shared memory, followed by a barrier
486         if (m_data.payloadSC == SC_WORKGROUP)
487         {
488                 css << "   payload.x[sharedCoord] = 0;\n";
489         }
490         if (m_data.guardSC == SC_WORKGROUP)
491         {
492                 css << "   guard.x[sharedCoord] = 0;\n";
493         }
494         if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
495         {
496                 switch (invocationMapping)
497                 {
498                 default: DE_ASSERT(0); // fall through
499                 case SCOPE_SUBGROUP:    css << "   subgroupBarrier();\n"; break;
500                 case SCOPE_WORKGROUP:   css << "   barrier();\n"; break;
501                 }
502         }
503
504         if (m_data.testType == TT_MP)
505         {
506                 // Store payload
507                 switch (m_data.payloadSC)
508                 {
509                 default: DE_ASSERT(0); // fall through
510                 case SC_BUFFER:         css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
511                 case SC_IMAGE:          css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
512                 case SC_WORKGROUP:      css << "   payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
513                 }
514         }
515         else
516         {
517                 DE_ASSERT(m_data.testType == TT_WAR);
518                 // Load payload
519                 switch (m_data.payloadSC)
520                 {
521                 default: DE_ASSERT(0); // fall through
522                 case SC_BUFFER:         css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
523                 case SC_IMAGE:          css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
524                 case SC_WORKGROUP:      css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
525                 }
526         }
527         if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
528         {
529                 // Acquire and release separate from control barrier
530                 css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
531                            "   controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
532                            "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
533         }
534         else if (m_data.syncType == ST_CONTROL_BARRIER)
535         {
536                 // Control barrier performs both acquire and release
537                 css << "   controlBarrier(" << scopeStr << ", " << scopeStr << ", "
538                                                                         << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
539                                                                         << semanticsAcquireRelease.str() << ");\n";
540         }
541         else
542         {
543                 // Release barrier
544                 std::stringstream atomicReleaseSemantics;
545                 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
546                 {
547                         css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
548                         atomicReleaseSemantics << ", 0, 0";
549                 }
550                 else
551                 {
552                         atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
553                 }
554                 // Atomic store guard
555                 if (m_data.atomicRMW)
556                 {
557                         switch (m_data.guardSC)
558                         {
559                         default: DE_ASSERT(0); // fall through
560                         case SC_BUFFER:         css << "   atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
561                         case SC_IMAGE:          css << "   imageAtomicExchange(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
562                         case SC_WORKGROUP:      css << "   atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
563                         }
564                 }
565                 else
566                 {
567                         switch (m_data.guardSC)
568                         {
569                         default: DE_ASSERT(0); // fall through
570                         case SC_BUFFER:         css << "   atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
571                         case SC_IMAGE:          css << "   imageAtomicStore(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
572                         case SC_WORKGROUP:      css << "   atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
573                         }
574                 }
575
576                 std::stringstream atomicAcquireSemantics;
577                 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
578                 {
579                         atomicAcquireSemantics << ", 0, 0";
580                 }
581                 else
582                 {
583                         atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
584                 }
585                 // Atomic load guard
586                 if (m_data.atomicRMW)
587                 {
588                         switch (m_data.guardSC)
589                         {
590                         default: DE_ASSERT(0); // fall through
591                         case SC_BUFFER:         css << "   skip = atomicExchange(guard.x[partnerBufferCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
592                         case SC_IMAGE:          css << "   skip = imageAtomicExchange(guard, partnerImageCoord, 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
593                         case SC_WORKGROUP:      css << "   skip = atomicExchange(guard.x[partnerSharedCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
594                         }
595                 } else
596                 {
597                         switch (m_data.guardSC)
598                         {
599                         default: DE_ASSERT(0); // fall through
600                         case SC_BUFFER:         css << "   skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
601                         case SC_IMAGE:          css << "   skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
602                         case SC_WORKGROUP:      css << "   skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
603                         }
604                 }
605                 // Acquire barrier
606                 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
607                 {
608                         css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
609                 }
610         }
611         if (m_data.testType == TT_MP)
612         {
613                 // Load payload
614                 switch (m_data.payloadSC)
615                 {
616                 default: DE_ASSERT(0); // fall through
617                 case SC_BUFFER:         css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
618                 case SC_IMAGE:          css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
619                 case SC_WORKGROUP:      css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
620                 }
621                 css <<
622                         "   if (!skip && r != partnerBufferCoord) { fail.x[bufferCoord] = 1; }\n"
623                         "}\n";
624         }
625         else
626         {
627                 DE_ASSERT(m_data.testType == TT_WAR);
628                 // Store payload, only if the partner invocation has already done its read
629                 css << "   if (!skip) {\n   ";
630                 switch (m_data.payloadSC)
631                 {
632                 default: DE_ASSERT(0); // fall through
633                 case SC_BUFFER:         css << "   payload.x[bufferCoord] = bufferCoord;\n"; break;
634                 case SC_IMAGE:          css << "   imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n"; break;
635                 case SC_WORKGROUP:      css << "   payload.x[sharedCoord] = bufferCoord;\n"; break;
636                 }
637                 css <<
638                         "   }\n"
639                         "   if (r != 0) { fail.x[bufferCoord] = 1; }\n"
640                         "}\n";
641         }
642
643         // Draw a fullscreen triangle strip based on gl_VertexIndex
644         std::stringstream vss;
645         vss <<
646                 "#version 450 core\n"
647                 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
648                 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
649
650         const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
651
652         switch (m_data.stage)
653         {
654         default: DE_ASSERT(0); // fall through
655         case STAGE_COMPUTE:
656                 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
657                 break;
658         case STAGE_VERTEX:
659                 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
660                 break;
661         case STAGE_FRAGMENT:
662                 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
663                 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
664                 break;
665         }
666 }
667
668 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
669 {
670         return new MemoryModelTestInstance(context, m_data);
671 }
672
673 VkBufferCreateInfo makeBufferCreateInfo (const VkDeviceSize                     bufferSize,
674                                                                                  const VkBufferUsageFlags       usage)
675 {
676         const VkBufferCreateInfo bufferCreateInfo =
677         {
678                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,   // VkStructureType              sType;
679                 DE_NULL,                                                                // const void*                  pNext;
680                 (VkBufferCreateFlags)0,                                 // VkBufferCreateFlags  flags;
681                 bufferSize,                                                             // VkDeviceSize                 size;
682                 usage,                                                                  // VkBufferUsageFlags   usage;
683                 VK_SHARING_MODE_EXCLUSIVE,                              // VkSharingMode                sharingMode;
684                 0u,                                                                             // deUint32                             queueFamilyIndexCount;
685                 DE_NULL,                                                                // const deUint32*              pQueueFamilyIndices;
686         };
687         return bufferCreateInfo;
688 }
689
690 Move<VkDescriptorSet> makeDescriptorSet (const DeviceInterface&                 vk,
691                                                                                  const VkDevice                                 device,
692                                                                                  const VkDescriptorPool                 descriptorPool,
693                                                                                  const VkDescriptorSetLayout    setLayout)
694 {
695         const VkDescriptorSetAllocateInfo allocateParams =
696         {
697                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,         // VkStructureType                              sType;
698                 DE_NULL,                                                                                        // const void*                                  pNext;
699                 descriptorPool,                                                                         // VkDescriptorPool                             descriptorPool;
700                 1u,                                                                                                     // deUint32                                             setLayoutCount;
701                 &setLayout,                                                                                     // const VkDescriptorSetLayout* pSetLayouts;
702         };
703         return allocateDescriptorSet(vk, device, &allocateParams);
704 }
705
706 tcu::TestStatus MemoryModelTestInstance::iterate (void)
707 {
708         const DeviceInterface&  vk                                              = m_context.getDeviceInterface();
709         const VkDevice                  device                                  = m_context.getDevice();
710         Allocator&                              allocator                               = m_context.getDefaultAllocator();
711
712         VkPhysicalDeviceProperties2 properties;
713         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
714         properties.pNext = NULL;
715
716         m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
717
718         deUint32 DIM = 31;
719         deUint32 NUM_WORKGROUP_EACH_DIM = 8;
720         // If necessary, shrink workgroup size to fit HW limits
721         if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
722         {
723                 DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
724         }
725         deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
726
727         VkDeviceSize bufferSizes[3];
728         de::MovePtr<BufferWithMemory> buffers[3];
729         vk::VkDescriptorBufferInfo bufferDescriptors[3];
730         de::MovePtr<BufferWithMemory> copyBuffer;
731
732         for (deUint32 i = 0; i < 3; ++i)
733         {
734                 size_t elementSize = m_data.dataType == DATA_TYPE_UINT64 ? sizeof(deUint64) : sizeof(deUint32);
735                 // buffer2 is the "fail" buffer, and is always uint
736                 if (i == 2)
737                         elementSize = sizeof(deUint32);
738                 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
739
740                 bool local;
741                 switch (i)
742                 {
743                 default: DE_ASSERT(0); // fall through
744                 case 0:
745                         if (m_data.payloadSC != SC_BUFFER)
746                                 continue;
747                         local = m_data.payloadMemLocal;
748                         break;
749                 case 1:
750                         if (m_data.guardSC != SC_BUFFER)
751                                 continue;
752                         local = m_data.guardMemLocal;
753                         break;
754                 case 2: local = true; break;
755                 }
756
757                 try
758                 {
759                         buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
760                                 vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
761                                 local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
762                 }
763                 catch (const tcu::NotSupportedError&)
764                 {
765                         if (!local)
766                         {
767                                 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
768                         }
769                         throw;
770                 }
771                 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
772         }
773
774         // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
775         try
776         {
777                 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
778                         vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
779         }
780         catch (const tcu::NotSupportedError&)
781         {
782                 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
783                         vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
784         }
785
786         const VkImageCreateInfo                 imageCreateInfo                 =
787         {
788                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,    // VkStructureType                sType;
789                 DE_NULL,                                                                // const void*                    pNext;
790                 (VkImageCreateFlags)0u,                                 // VkImageCreateFlags      flags;
791                 VK_IMAGE_TYPE_2D,                                               // VkImageType                    imageType;
792                 VK_FORMAT_R32_UINT,                                             // VkFormat                              format;
793                 {
794                         DIM*NUM_WORKGROUP_EACH_DIM,     // deUint32     width;
795                         DIM*NUM_WORKGROUP_EACH_DIM,     // deUint32     height;
796                         1u              // deUint32     depth;
797                 },                                                                              // VkExtent3D                      extent;
798                 1u,                                                                             // deUint32                              mipLevels;
799                 1u,                                                                             // deUint32                              arrayLayers;
800                 VK_SAMPLE_COUNT_1_BIT,                                  // VkSampleCountFlagBits        samples;
801                 VK_IMAGE_TILING_OPTIMAL,                                // VkImageTiling                        tiling;
802                 VK_IMAGE_USAGE_STORAGE_BIT
803                 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
804                 | VK_IMAGE_USAGE_TRANSFER_DST_BIT,              // VkImageUsageFlags            usage;
805                 VK_SHARING_MODE_EXCLUSIVE,                              // VkSharingMode                        sharingMode;
806                 0u,                                                                             // deUint32                              queueFamilyIndexCount;
807                 DE_NULL,                                                                // const deUint32*                pQueueFamilyIndices;
808                 VK_IMAGE_LAYOUT_UNDEFINED                               // VkImageLayout                        initialLayout;
809         };
810         VkImageViewCreateInfo           imageViewCreateInfo             =
811         {
812                 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,       // VkStructureType                      sType;
813                 DE_NULL,                                                                        // const void*                          pNext;
814                 (VkImageViewCreateFlags)0u,                                     // VkImageViewCreateFlags        flags;
815                 DE_NULL,                                                                        // VkImage                                      image;
816                 VK_IMAGE_VIEW_TYPE_2D,                                          // VkImageViewType                      viewType;
817                 VK_FORMAT_R32_UINT,                                                                             // VkFormat                                format;
818                 {
819                         VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle   r;
820                         VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle   g;
821                         VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle   b;
822                         VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle   a;
823                 },                                                                                      // VkComponentMapping            components;
824                 {
825                         VK_IMAGE_ASPECT_COLOR_BIT,      // VkImageAspectFlags   aspectMask;
826                         0u,                                                     // deUint32                       baseMipLevel;
827                         1u,                                                     // deUint32                       levelCount;
828                         0u,                                                     // deUint32                       baseArrayLayer;
829                         1u                                                      // deUint32                       layerCount;
830                 }                                                                                       // VkImageSubresourceRange      subresourceRange;
831         };
832
833
834         de::MovePtr<ImageWithMemory> images[2];
835         Move<VkImageView> imageViews[2];
836         vk::VkDescriptorImageInfo imageDescriptors[2];
837
838         for (deUint32 i = 0; i < 2; ++i)
839         {
840
841                 bool local;
842                 switch (i)
843                 {
844                 default: DE_ASSERT(0); // fall through
845                 case 0:
846                         if (m_data.payloadSC != SC_IMAGE)
847                                 continue;
848                         local = m_data.payloadMemLocal;
849                         break;
850                 case 1:
851                         if (m_data.guardSC != SC_IMAGE)
852                                 continue;
853                         local = m_data.guardMemLocal;
854                         break;
855                 }
856
857                 try
858                 {
859                         images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
860                                 vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
861                 }
862                 catch (const tcu::NotSupportedError&)
863                 {
864                         if (!local)
865                         {
866                                 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
867                         }
868                         throw;
869                 }
870                 imageViewCreateInfo.image = **images[i];
871                 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
872
873                 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
874         }
875
876         vk::DescriptorSetLayoutBuilder layoutBuilder;
877
878         switch (m_data.payloadSC)
879         {
880         default:
881         case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
882         case SC_IMAGE:  layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
883         }
884         switch (m_data.guardSC)
885         {
886         default:
887         case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
888         case SC_IMAGE:  layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
889         }
890         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
891
892         vk::Unique<vk::VkDescriptorSetLayout>   descriptorSetLayout(layoutBuilder.build(vk, device));
893
894         vk::Unique<vk::VkDescriptorPool>                descriptorPool(vk::DescriptorPoolBuilder()
895                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
896                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
897                 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
898         vk::Unique<vk::VkDescriptorSet>                 descriptorSet           (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
899
900         vk::DescriptorSetUpdateBuilder setUpdateBuilder;
901         switch (m_data.payloadSC)
902         {
903         default: DE_ASSERT(0); // fall through
904         case SC_WORKGROUP:
905                 break;
906         case SC_BUFFER:
907                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
908                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
909                 break;
910         case SC_IMAGE:
911                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
912                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
913                 break;
914         }
915         switch (m_data.guardSC)
916         {
917         default: DE_ASSERT(0); // fall through
918         case SC_WORKGROUP:
919                 break;
920         case SC_BUFFER:
921                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
922                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
923                 break;
924         case SC_IMAGE:
925                 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
926                         VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
927                 break;
928         }
929         setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
930                 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
931
932         setUpdateBuilder.update(vk, device);
933
934
935         const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
936         {
937                 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,                          // sType
938                 DE_NULL,                                                                                                        // pNext
939                 (VkPipelineLayoutCreateFlags)0,
940                 1,                                                                                                                      // setLayoutCount
941                 &descriptorSetLayout.get(),                                                                     // pSetLayouts
942                 0u,                                                                                                                     // pushConstantRangeCount
943                 DE_NULL,                                                                                                        // pPushConstantRanges
944         };
945
946         Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
947
948         Move<VkPipeline> pipeline;
949         Move<VkRenderPass> renderPass;
950         Move<VkFramebuffer> framebuffer;
951
952         VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
953
954         const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
955
956         const vk::VkSpecializationMapEntry entries[3] =
957         {
958                 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
959                 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
960         };
961
962         const vk::VkSpecializationInfo specInfo =
963         {
964                 2,                                              // mapEntryCount
965                 entries,                                // pMapEntries
966                 sizeof(specData),               // dataSize
967                 specData                                // pData
968         };
969
970         if (m_data.stage == STAGE_COMPUTE)
971         {
972                 const Unique<VkShaderModule>    shader                                          (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
973
974                 const VkPipelineShaderStageCreateInfo   shaderCreateInfo =
975                 {
976                         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
977                         DE_NULL,
978                         (VkPipelineShaderStageCreateFlags)0,
979                         VK_SHADER_STAGE_COMPUTE_BIT,                                                            // stage
980                         *shader,                                                                                                        // shader
981                         "main",
982                         &specInfo,                                                                                                      // pSpecializationInfo
983                 };
984
985                 const VkComputePipelineCreateInfo               pipelineCreateInfo =
986                 {
987                         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
988                         DE_NULL,
989                         0u,                                                                                                                     // flags
990                         shaderCreateInfo,                                                                                       // cs
991                         *pipelineLayout,                                                                                        // layout
992                         (vk::VkPipeline)0,                                                                                      // basePipelineHandle
993                         0u,                                                                                                                     // basePipelineIndex
994                 };
995                 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
996         }
997         else
998         {
999
1000                 const vk::VkSubpassDescription          subpassDesc                     =
1001                 {
1002                         (vk::VkSubpassDescriptionFlags)0,
1003                         vk::VK_PIPELINE_BIND_POINT_GRAPHICS,                                    // pipelineBindPoint
1004                         0u,                                                                                                             // inputCount
1005                         DE_NULL,                                                                                                // pInputAttachments
1006                         0u,                                                                                                             // colorCount
1007                         DE_NULL,                                                                                                // pColorAttachments
1008                         DE_NULL,                                                                                                // pResolveAttachments
1009                         DE_NULL,                                                                                                // depthStencilAttachment
1010                         0u,                                                                                                             // preserveCount
1011                         DE_NULL,                                                                                                // pPreserveAttachments
1012
1013                 };
1014                 const vk::VkRenderPassCreateInfo        renderPassParams        =
1015                 {
1016                         vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,                  // sType
1017                         DE_NULL,                                                                                                // pNext
1018                         (vk::VkRenderPassCreateFlags)0,
1019                         0u,                                                                                                             // attachmentCount
1020                         DE_NULL,                                                                                                // pAttachments
1021                         1u,                                                                                                             // subpassCount
1022                         &subpassDesc,                                                                                   // pSubpasses
1023                         0u,                                                                                                             // dependencyCount
1024                         DE_NULL,                                                                                                // pDependencies
1025                 };
1026
1027                 renderPass = createRenderPass(vk, device, &renderPassParams);
1028
1029                 const vk::VkFramebufferCreateInfo       framebufferParams       =
1030                 {
1031                         vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,  // sType
1032                         DE_NULL,                                                                                // pNext
1033                         (vk::VkFramebufferCreateFlags)0,
1034                         *renderPass,                                                                    // renderPass
1035                         0u,                                                                                             // attachmentCount
1036                         DE_NULL,                                                                                // pAttachments
1037                         DIM*NUM_WORKGROUP_EACH_DIM,                                             // width
1038                         DIM*NUM_WORKGROUP_EACH_DIM,                                             // height
1039                         1u,                                                                                             // layers
1040                 };
1041
1042                 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1043
1044                 const VkPipelineVertexInputStateCreateInfo              vertexInputStateCreateInfo              =
1045                 {
1046                         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                      sType;
1047                         DE_NULL,                                                                                                        // const void*                                                          pNext;
1048                         (VkPipelineVertexInputStateCreateFlags)0,                                       // VkPipelineVertexInputStateCreateFlags        flags;
1049                         0u,                                                                                                                     // deUint32                                                                     vertexBindingDescriptionCount;
1050                         DE_NULL,                                                                                                        // const VkVertexInputBindingDescription*       pVertexBindingDescriptions;
1051                         0u,                                                                                                                     // deUint32                                                                     vertexAttributeDescriptionCount;
1052                         DE_NULL                                                                                                         // const VkVertexInputAttributeDescription*     pVertexAttributeDescriptions;
1053                 };
1054
1055                 const VkPipelineInputAssemblyStateCreateInfo    inputAssemblyStateCreateInfo    =
1056                 {
1057                         VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,    // VkStructureType                                                      sType;
1058                         DE_NULL,                                                                                                                // const void*                                                          pNext;
1059                         (VkPipelineInputAssemblyStateCreateFlags)0,                                             // VkPipelineInputAssemblyStateCreateFlags      flags;
1060                         (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology                                                topology;
1061                         VK_FALSE                                                                                                                // VkBool32                                                                     primitiveRestartEnable;
1062                 };
1063
1064                 const VkPipelineRasterizationStateCreateInfo    rasterizationStateCreateInfo    =
1065                 {
1066                         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,             // VkStructureType                                                      sType;
1067                         DE_NULL,                                                                                                                // const void*                                                          pNext;
1068                         (VkPipelineRasterizationStateCreateFlags)0,                                             // VkPipelineRasterizationStateCreateFlags      flags;
1069                         VK_FALSE,                                                                                                               // VkBool32                                                                     depthClampEnable;
1070                         (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,                    // VkBool32                                                                     rasterizerDiscardEnable;
1071                         VK_POLYGON_MODE_FILL,                                                                                   // VkPolygonMode                                                        polygonMode;
1072                         VK_CULL_MODE_NONE,                                                                                              // VkCullModeFlags                                                      cullMode;
1073                         VK_FRONT_FACE_CLOCKWISE,                                                                                // VkFrontFace                                                          frontFace;
1074                         VK_FALSE,                                                                                                               // VkBool32                                                                     depthBiasEnable;
1075                         0.0f,                                                                                                                   // float                                                                        depthBiasConstantFactor;
1076                         0.0f,                                                                                                                   // float                                                                        depthBiasClamp;
1077                         0.0f,                                                                                                                   // float                                                                        depthBiasSlopeFactor;
1078                         1.0f                                                                                                                    // float                                                                        lineWidth;
1079                 };
1080
1081                 const VkPipelineMultisampleStateCreateInfo              multisampleStateCreateInfo =
1082                 {
1083                         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,       // VkStructureType                                                sType
1084                         DE_NULL,                                                                                                        // const void*                                                    pNext
1085                         0u,                                                                                                                     // VkPipelineMultisampleStateCreateFlags        flags
1086                         VK_SAMPLE_COUNT_1_BIT,                                                                          // VkSampleCountFlagBits                                        rasterizationSamples
1087                         VK_FALSE,                                                                                                       // VkBool32                                                              sampleShadingEnable
1088                         1.0f,                                                                                                           // float                                                                        minSampleShading
1089                         DE_NULL,                                                                                                        // const VkSampleMask*                                    pSampleMask
1090                         VK_FALSE,                                                                                                       // VkBool32                                                              alphaToCoverageEnable
1091                         VK_FALSE                                                                                                        // VkBool32                                                              alphaToOneEnable
1092                 };
1093
1094                 VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1095                 VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1096
1097                 const VkPipelineViewportStateCreateInfo                 viewportStateCreateInfo                         =
1098                 {
1099                         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,  // VkStructureType                                                       sType
1100                         DE_NULL,                                                                                                // const void*                                                           pNext
1101                         (VkPipelineViewportStateCreateFlags)0,                                  // VkPipelineViewportStateCreateFlags             flags
1102                         1u,                                                                                                             // deUint32                                                                     viewportCount
1103                         &viewport,                                                                                              // const VkViewport*                                               pViewports
1104                         1u,                                                                                                             // deUint32                                                                     scissorCount
1105                         &scissor                                                                                                // const VkRect2D*                                                       pScissors
1106                 };
1107
1108                 Move<VkShaderModule> fs;
1109                 Move<VkShaderModule> vs;
1110
1111                 deUint32 numStages;
1112                 if (m_data.stage == STAGE_VERTEX)
1113                 {
1114                         vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1115                         fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1116                         numStages = 1u;
1117                 }
1118                 else
1119                 {
1120                         vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1121                         fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1122                         numStages = 2u;
1123                 }
1124
1125                 const VkPipelineShaderStageCreateInfo   shaderCreateInfo[2] = {
1126                         {
1127                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1128                                 DE_NULL,
1129                                 (VkPipelineShaderStageCreateFlags)0,
1130                                 VK_SHADER_STAGE_VERTEX_BIT,                                                                     // stage
1131                                 *vs,                                                                                                            // shader
1132                                 "main",
1133                                 &specInfo,                                                                                                      // pSpecializationInfo
1134                         },
1135                         {
1136                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1137                                 DE_NULL,
1138                                 (VkPipelineShaderStageCreateFlags)0,
1139                                 VK_SHADER_STAGE_FRAGMENT_BIT,                                                           // stage
1140                                 *fs,                                                                                                            // shader
1141                                 "main",
1142                                 &specInfo,                                                                                                      // pSpecializationInfo
1143                         }
1144                 };
1145
1146                 const VkGraphicsPipelineCreateInfo                              graphicsPipelineCreateInfo              =
1147                 {
1148                         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,        // VkStructureType                                                                      sType;
1149                         DE_NULL,                                                                                        // const void*                                                                          pNext;
1150                         (VkPipelineCreateFlags)0,                                                       // VkPipelineCreateFlags                                                        flags;
1151                         numStages,                                                                                      // deUint32                                                                                     stageCount;
1152                         &shaderCreateInfo[0],                                                           // const VkPipelineShaderStageCreateInfo*                       pStages;
1153                         &vertexInputStateCreateInfo,                                            // const VkPipelineVertexInputStateCreateInfo*          pVertexInputState;
1154                         &inputAssemblyStateCreateInfo,                                          // const VkPipelineInputAssemblyStateCreateInfo*        pInputAssemblyState;
1155                         DE_NULL,                                                                                        // const VkPipelineTessellationStateCreateInfo*         pTessellationState;
1156                         &viewportStateCreateInfo,                                                       // const VkPipelineViewportStateCreateInfo*                     pViewportState;
1157                         &rasterizationStateCreateInfo,                                          // const VkPipelineRasterizationStateCreateInfo*        pRasterizationState;
1158                         &multisampleStateCreateInfo,                                            // const VkPipelineMultisampleStateCreateInfo*          pMultisampleState;
1159                         DE_NULL,                                                                                        // const VkPipelineDepthStencilStateCreateInfo*         pDepthStencilState;
1160                         DE_NULL,                                                                                        // const VkPipelineColorBlendStateCreateInfo*           pColorBlendState;
1161                         DE_NULL,                                                                                        // const VkPipelineDynamicStateCreateInfo*                      pDynamicState;
1162                         pipelineLayout.get(),                                                           // VkPipelineLayout                                                                     layout;
1163                         renderPass.get(),                                                                       // VkRenderPass                                                                         renderPass;
1164                         0u,                                                                                                     // deUint32                                                                                     subpass;
1165                         DE_NULL,                                                                                        // VkPipeline                                                                           basePipelineHandle;
1166                         0                                                                                                       // int                                                                                          basePipelineIndex;
1167                 };
1168
1169                 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1170         }
1171
1172         const VkQueue                           queue                           = m_context.getUniversalQueue();
1173         Move<VkCommandPool>                             cmdPool                                 = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
1174         Move<VkCommandBuffer>                   cmdBuffer                               = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1175
1176         beginCommandBuffer(vk, *cmdBuffer, 0u);
1177
1178         vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1179
1180         for (deUint32 i = 0; i < 2; ++i)
1181         {
1182                 if (!images[i])
1183                         continue;
1184
1185                 const VkImageMemoryBarrier imageBarrier =
1186                 {
1187                         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                         // VkStructureType              sType
1188                         DE_NULL,                                                                                        // const void*                  pNext
1189                         0u,                                                                                                     // VkAccessFlags                srcAccessMask
1190                         VK_ACCESS_TRANSFER_WRITE_BIT,                                           // VkAccessFlags                dstAccessMask
1191                         VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                oldLayout
1192                         VK_IMAGE_LAYOUT_GENERAL,                                                        // VkImageLayout                newLayout
1193                         VK_QUEUE_FAMILY_IGNORED,                                                        // uint32_t                             srcQueueFamilyIndex
1194                         VK_QUEUE_FAMILY_IGNORED,                                                        // uint32_t                             dstQueueFamilyIndex
1195                         **images[i],                                                                            // VkImage                              image
1196                         {
1197                                 VK_IMAGE_ASPECT_COLOR_BIT,                              // VkImageAspectFlags   aspectMask
1198                                 0u,                                                                             // uint32_t                             baseMipLevel
1199                                 1u,                                                                             // uint32_t                             mipLevels,
1200                                 0u,                                                                             // uint32_t                             baseArray
1201                                 1u,                                                                             // uint32_t                             arraySize
1202                         }
1203                 };
1204
1205                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1206                                                          (VkDependencyFlags)0,
1207                                                           0, (const VkMemoryBarrier*)DE_NULL,
1208                                                           0, (const VkBufferMemoryBarrier*)DE_NULL,
1209                                                           1, &imageBarrier);
1210         }
1211
1212         vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1213         vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1214
1215         VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1216         VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1217
1218         VkMemoryBarrier                                 memBarrier =
1219         {
1220                 VK_STRUCTURE_TYPE_MEMORY_BARRIER,       // sType
1221                 DE_NULL,                                                        // pNext
1222                 0u,                                                                     // srcAccessMask
1223                 0u,                                                                     // dstAccessMask
1224         };
1225
1226         for (deUint32 iters = 0; iters < 200; ++iters)
1227         {
1228                 for (deUint32 i = 0; i < 2; ++i)
1229                 {
1230                         if (buffers[i])
1231                                 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1232                         if (images[i])
1233                                 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1234                 }
1235
1236                 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1237                 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1238                 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1239                         0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1240
1241                 if (m_data.stage == STAGE_COMPUTE)
1242                 {
1243                         vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1244                 }
1245                 else
1246                 {
1247                         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1248                                                         makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1249                                                         0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1250                         // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1251                         if (m_data.stage == STAGE_VERTEX)
1252                         {
1253                                 vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1254                         }
1255                         else
1256                         {
1257                                 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1258                         }
1259                         endRenderPass(vk, *cmdBuffer);
1260                 }
1261
1262                 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1263                 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1264                 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1265                         0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1266         }
1267
1268         const VkBufferCopy      copyParams =
1269         {
1270                 (VkDeviceSize)0u,                                               // srcOffset
1271                 (VkDeviceSize)0u,                                               // dstOffset
1272                 bufferSizes[2]                                                  // size
1273         };
1274
1275         vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, &copyParams);
1276
1277         endCommandBuffer(vk, *cmdBuffer);
1278
1279         submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1280
1281         tcu::TestLog& log = m_context.getTestContext().getLog();
1282
1283         deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1284         invalidateMappedMemoryRange(vk, device, copyBuffer->getAllocation().getMemory(), copyBuffer->getAllocation().getOffset(), bufferSizes[2]);
1285         qpTestResult res = QP_TEST_RESULT_PASS;
1286
1287         deUint32 numErrors = 0;
1288         for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1289         {
1290                 if (ptr[i] != 0)
1291                 {
1292                         if (numErrors < 256)
1293                         {
1294                                 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1295                         }
1296                         numErrors++;
1297                         res = QP_TEST_RESULT_FAIL;
1298                 }
1299         }
1300
1301         if (numErrors)
1302         {
1303                 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1304         }
1305
1306         return tcu::TestStatus(res, qpGetTestResultName(res));
1307 }
1308
1309 }       // anonymous
1310
1311 tcu::TestCaseGroup*     createTests (tcu::TestContext& testCtx)
1312 {
1313         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1314                         testCtx, "memory_model", "Memory model tests"));
1315
1316         typedef struct
1317         {
1318                 deUint32                                value;
1319                 const char*                             name;
1320                 const char*                             description;
1321         } TestGroupCase;
1322
1323         TestGroupCase ttCases[] =
1324         {
1325                 { TT_MP,        "message_passing",      "message passing"               },
1326                 { TT_WAR,       "write_after_read",     "write after read"              },
1327         };
1328
1329         TestGroupCase core11Cases[] =
1330         {
1331                 { 1,    "core11",       "Supported by Vulkan1.1"                                                        },
1332                 { 0,    "ext",          "Requires VK_KHR_vulkan_memory_model extension"         },
1333         };
1334
1335         TestGroupCase dtCases[] =
1336         {
1337                 { DATA_TYPE_UINT,       "u32",  "uint32_t atomics"              },
1338                 { DATA_TYPE_UINT64,     "u64",  "uint64_t atomics"              },
1339         };
1340
1341         TestGroupCase cohCases[] =
1342         {
1343                 { 1,    "coherent",             "coherent payload variable"                     },
1344                 { 0,    "noncoherent",  "noncoherent payload variable"          },
1345         };
1346
1347         TestGroupCase stCases[] =
1348         {
1349                 { ST_FENCE_FENCE,                                       "fence_fence",                                  "release fence, acquire fence"                  },
1350                 { ST_FENCE_ATOMIC,                                      "fence_atomic",                                 "release fence, atomic acquire"                 },
1351                 { ST_ATOMIC_FENCE,                                      "atomic_fence",                                 "atomic release, acquire fence"                 },
1352                 { ST_ATOMIC_ATOMIC,                                     "atomic_atomic",                                "atomic release, atomic acquire"                },
1353                 { ST_CONTROL_BARRIER,                           "control_barrier",                              "control barrier"                                               },
1354                 { ST_CONTROL_AND_MEMORY_BARRIER,        "control_and_memory_barrier",   "control barrier with release/acquire"  },
1355         };
1356
1357         TestGroupCase rmwCases[] =
1358         {
1359                 { 0,    "atomicwrite",          "atomic write"          },
1360                 { 1,    "atomicrmw",            "atomic rmw"            },
1361         };
1362
1363         TestGroupCase scopeCases[] =
1364         {
1365                 { SCOPE_DEVICE,                 "device",               "device scope"                  },
1366                 { SCOPE_QUEUEFAMILY,    "queuefamily",  "queuefamily scope"             },
1367                 { SCOPE_WORKGROUP,              "workgroup",    "workgroup scope"               },
1368                 { SCOPE_SUBGROUP,               "subgroup",             "subgroup scope"                },
1369         };
1370
1371         TestGroupCase plCases[] =
1372         {
1373                 { 0,    "payload_nonlocal",             "payload variable in non-local memory"          },
1374                 { 1,    "payload_local",                "payload variable in local memory"                      },
1375         };
1376
1377         TestGroupCase pscCases[] =
1378         {
1379                 { SC_BUFFER,    "buffer",               "payload variable in buffer memory"                     },
1380                 { SC_IMAGE,             "image",                "payload variable in image memory"                      },
1381                 { SC_WORKGROUP, "workgroup",    "payload variable in workgroup memory"          },
1382         };
1383
1384         TestGroupCase glCases[] =
1385         {
1386                 { 0,    "guard_nonlocal",               "guard variable in non-local memory"            },
1387                 { 1,    "guard_local",                  "guard variable in local memory"                        },
1388         };
1389
1390         TestGroupCase gscCases[] =
1391         {
1392                 { SC_BUFFER,    "buffer",               "guard variable in buffer memory"                       },
1393                 { SC_IMAGE,             "image",                "guard variable in image memory"                        },
1394                 { SC_WORKGROUP, "workgroup",    "guard variable in workgroup memory"            },
1395         };
1396
1397         TestGroupCase stageCases[] =
1398         {
1399                 { STAGE_COMPUTE,        "comp",         "compute shader"                        },
1400                 { STAGE_VERTEX,         "vert",         "vertex shader"                         },
1401                 { STAGE_FRAGMENT,       "frag",         "fragment shader"                       },
1402         };
1403
1404
1405         for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1406         {
1407                 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1408                 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1409                 {
1410                         de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1411                         for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1412                         {
1413                                 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1414                                 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1415                                 {
1416                                         de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1417                                         for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1418                                         {
1419                                                 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1420                                                 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1421                                                 {
1422                                                         de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1423                                                         for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1424                                                         {
1425                                                                 de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1426                                                                 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1427                                                                 {
1428                                                                         de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1429                                                                         for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1430                                                                         {
1431                                                                                 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1432                                                                                 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1433                                                                                 {
1434                                                                                         de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1435                                                                                         for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1436                                                                                         {
1437                                                                                                 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1438                                                                                                 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1439                                                                                                 {
1440                                                                                                         CaseDef c =
1441                                                                                                         {
1442                                                                                                                 !!plCases[plNdx].value,                                 // bool payloadMemLocal;
1443                                                                                                                 !!glCases[glNdx].value,                                 // bool guardMemLocal;
1444                                                                                                                 !!cohCases[cohNdx].value,                               // bool coherent;
1445                                                                                                                 !!core11Cases[core11Ndx].value,                 // bool core11;
1446                                                                                                                 !!rmwCases[rmwNdx].value,                               // bool atomicRMW;
1447                                                                                                                 (TestType)ttCases[ttNdx].value,                 // TestType testType;
1448                                                                                                                 (StorageClass)pscCases[pscNdx].value,   // StorageClass payloadSC;
1449                                                                                                                 (StorageClass)gscCases[gscNdx].value,   // StorageClass guardSC;
1450                                                                                                                 (Scope)scopeCases[scopeNdx].value,              // Scope scope;
1451                                                                                                                 (SyncType)stCases[stNdx].value,                 // SyncType syncType;
1452                                                                                                                 (Stage)stageCases[stageNdx].value,              // Stage stage;
1453                                                                                                                 (DataType)dtCases[dtNdx].value,                 // DataType dataType;
1454                                                                                                         };
1455
1456                                                                                                         // Mustpass11 tests should only exercise things we expect to work on
1457                                                                                                         // existing implementations. Exclude noncoherent tests which require
1458                                                                                                         // new extensions, and assume atomic synchronization wouldn't work
1459                                                                                                         // (i.e. atomics may be implemented as relaxed atomics). Exclude
1460                                                                                                         // queuefamily scope which doesn't exist in Vulkan 1.1.
1461                                                                                                         if (c.core11 &&
1462                                                                                                                 (c.coherent == 0 ||
1463                                                                                                                 c.syncType == ST_FENCE_ATOMIC ||
1464                                                                                                                 c.syncType == ST_ATOMIC_FENCE ||
1465                                                                                                                 c.syncType == ST_ATOMIC_ATOMIC ||
1466                                                                                                                 c.dataType == DATA_TYPE_UINT64 ||
1467                                                                                                                 c.scope == SCOPE_QUEUEFAMILY))
1468                                                                                                         {
1469                                                                                                                 continue;
1470                                                                                                         }
1471
1472                                                                                                         if (c.stage != STAGE_COMPUTE &&
1473                                                                                                                 c.scope == SCOPE_WORKGROUP)
1474                                                                                                         {
1475                                                                                                                 continue;
1476                                                                                                         }
1477
1478                                                                                                         // Don't exercise local and non-local for workgroup memory
1479                                                                                                         // Also don't exercise workgroup memory for non-compute stages
1480                                                                                                         if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1481                                                                                                         {
1482                                                                                                                 continue;
1483                                                                                                         }
1484                                                                                                         if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1485                                                                                                         {
1486                                                                                                                 continue;
1487                                                                                                         }
1488                                                                                                         // Can't do control barrier with larger than workgroup scope, or non-compute stages
1489                                                                                                         if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1490                                                                                                                 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1491                                                                                                         {
1492                                                                                                                 continue;
1493                                                                                                         }
1494
1495                                                                                                         // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1496                                                                                                         if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1497                                                                                                         {
1498                                                                                                                 continue;
1499                                                                                                         }
1500
1501                                                                                                         // uint64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1502                                                                                                         if (c.dataType == DATA_TYPE_UINT64 && c.syncType != ST_ATOMIC_ATOMIC)
1503                                                                                                         {
1504                                                                                                                 continue;
1505                                                                                                         }
1506
1507                                                                                                         // No 64-bit image types, so skip tests with both payload and guard in image memory
1508                                                                                                         if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1509                                                                                                         {
1510                                                                                                                 continue;
1511                                                                                                         }
1512
1513                                                                                                         // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1514                                                                                                         if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1515                                                                                                                 (c.guardSC != 0 || c.guardMemLocal != 0))
1516                                                                                                         {
1517                                                                                                                 continue;
1518                                                                                                         }
1519
1520                                                                                                         gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1521                                                                                                 }
1522                                                                                                 glGroup->addChild(gscGroup.release());
1523                                                                                         }
1524                                                                                         pscGroup->addChild(glGroup.release());
1525                                                                                 }
1526                                                                                 plGroup->addChild(pscGroup.release());
1527                                                                         }
1528                                                                         scopeGroup->addChild(plGroup.release());
1529                                                                 }
1530                                                                 rmwGroup->addChild(scopeGroup.release());
1531                                                         }
1532                                                         stGroup->addChild(rmwGroup.release());
1533                                                 }
1534                                                 cohGroup->addChild(stGroup.release());
1535                                         }
1536                                         dtGroup->addChild(cohGroup.release());
1537                                 }
1538                                 core11Group->addChild(dtGroup.release());
1539                         }
1540                         ttGroup->addChild(core11Group.release());
1541                 }
1542                 group->addChild(ttGroup.release());
1543         }
1544         return group.release();
1545 }
1546
1547 }       // MemoryModel
1548 }       // vkt