Fix missing dependency on sparse binds
[platform/upstream/VK-GL-CTS.git] / modules / gles31 / functional / es31fSynchronizationTests.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Synchronization Tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuStringTemplate.hpp"
27 #include "tcuSurface.hpp"
28 #include "tcuRenderTarget.hpp"
29 #include "gluRenderContext.hpp"
30 #include "gluShaderProgram.hpp"
31 #include "gluObjectWrapper.hpp"
32 #include "gluPixelTransfer.hpp"
33 #include "gluContextInfo.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deStringUtil.hpp"
37 #include "deSharedPtr.hpp"
38 #include "deMemory.h"
39 #include "deRandom.hpp"
40
41 #include <map>
42
43 namespace deqp
44 {
45 namespace gles31
46 {
47 namespace Functional
48 {
49 namespace
50 {
51
52 static bool checkSupport(Context& ctx)
53 {
54         auto ctxType = ctx.getRenderContext().getType();
55         return contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
56                    contextSupports(ctxType, glu::ApiType::core(4, 5)) ||
57                    ctx.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic");
58 }
59
60 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
61 {
62         std::vector<deUint32> chainDelta(valueChain.size());
63
64         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
65                 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
66
67         // chainDelta contains now the actual additions applied to the value
68         // check there exists an addition ramp form 1 to ...
69         std::sort(chainDelta.begin(), chainDelta.end());
70
71         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
72         {
73                 if ((int)chainDelta[callNdx] != callNdx+1)
74                 {
75                         invalidOperationNdx = callNdx;
76                         errorDelta = chainDelta[callNdx];
77                         errorExpected = callNdx+1;
78
79                         return false;
80                 }
81         }
82
83         return true;
84 }
85
86 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
87 {
88         const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
89         GLU_EXPECT_NO_ERROR(gl.getError(), "map");
90
91         if (!ptr)
92                 throw tcu::TestError("mapBufferRange returned NULL");
93
94         result.resize(numElements);
95         memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
96
97         if (gl.unmapBuffer(target) == GL_FALSE)
98                 throw tcu::TestError("unmapBuffer returned false");
99 }
100
101 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
102 {
103         std::vector<deUint32> vec;
104
105         readBuffer(gl, target, 1, vec);
106
107         return vec[0];
108 }
109
110 //! Generate a ramp of values from 1 to numElements, and shuffle it
111 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
112 {
113         de::Random rng(0xabcd);
114
115         // some positive (non-zero) unique values
116         ramp.resize(numElements);
117         for (int callNdx = 0; callNdx < numElements; ++callNdx)
118                 ramp[callNdx] = callNdx + 1;
119
120         rng.shuffle(ramp.begin(), ramp.end());
121 }
122
123 static std::string specializeShader(Context& context, const char* code)
124 {
125         auto                                    ctxType                 = context.getRenderContext().getType();
126         const bool                              isES32orGL45    = glu::contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
127                                                                                           glu::contextSupports(ctxType, glu::ApiType::core(4, 5));
128         const glu::GLSLVersion  glslVersion             = glu::getContextTypeGLSLVersion(ctxType);
129
130         std::map<std::string, std::string> specializationMap;
131         specializationMap["GLSL_VERSION_DECL"]                          = glu::getGLSLVersionDeclaration(glslVersion);
132         specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"]        = isES32orGL45 ? "" : "#extension GL_OES_shader_image_atomic : require";
133
134         return tcu::StringTemplate(code).specialize(specializationMap);
135 }
136
137 class InterInvocationTestCase : public TestCase
138 {
139 public:
140         enum StorageType
141         {
142                 STORAGE_BUFFER = 0,
143                 STORAGE_IMAGE,
144
145                 STORAGE_LAST
146         };
147         enum CaseFlags
148         {
149                 FLAG_ATOMIC                             = 0x1,
150                 FLAG_ALIASING_STORAGES  = 0x2,
151                 FLAG_IN_GROUP                   = 0x4,
152         };
153
154                                                 InterInvocationTestCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
155                                                 ~InterInvocationTestCase        (void);
156
157 private:
158         void                            init                                            (void);
159         void                            deinit                                          (void);
160         IterateResult           iterate                                         (void);
161
162         void                            runCompute                                      (void);
163         bool                            verifyResults                           (void);
164         virtual std::string     genShaderSource                         (void) const = 0;
165
166 protected:
167         std::string                     genBarrierSource                        (void) const;
168
169         const StorageType       m_storage;
170         const bool                      m_useAtomic;
171         const bool                      m_aliasingStorages;
172         const bool                      m_syncWithGroup;
173         const int                       m_workWidth;                            // !< total work width
174         const int                       m_workHeight;                           // !<     ...    height
175         const int                       m_localWidth;                           // !< group width
176         const int                       m_localHeight;                          // !< group height
177         const int                       m_elementsPerInvocation;        // !< elements accessed by a single invocation
178
179 private:
180         glw::GLuint                     m_storageBuf;
181         glw::GLuint                     m_storageTex;
182         glw::GLuint                     m_resultBuf;
183         glu::ShaderProgram*     m_program;
184 };
185
186 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
187         : TestCase                                      (context, name, desc)
188         , m_storage                                     (storage)
189         , m_useAtomic                           ((flags & FLAG_ATOMIC) != 0)
190         , m_aliasingStorages            ((flags & FLAG_ALIASING_STORAGES) != 0)
191         , m_syncWithGroup                       ((flags & FLAG_IN_GROUP) != 0)
192         , m_workWidth                           (256)
193         , m_workHeight                          (256)
194         , m_localWidth                          (16)
195         , m_localHeight                         (8)
196         , m_elementsPerInvocation       (8)
197         , m_storageBuf                          (0)
198         , m_storageTex                          (0)
199         , m_resultBuf                           (0)
200         , m_program                                     (DE_NULL)
201 {
202         DE_ASSERT(m_storage < STORAGE_LAST);
203         DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
204 }
205
206 InterInvocationTestCase::~InterInvocationTestCase (void)
207 {
208         deinit();
209 }
210
211 void InterInvocationTestCase::init (void)
212 {
213         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
214
215         // requirements
216
217         if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
218                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
219
220         // program
221
222         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
223         m_testCtx.getLog() << *m_program;
224         if (!m_program->isOk())
225                 throw tcu::TestError("could not build program");
226
227         // source
228
229         if (m_storage == STORAGE_BUFFER)
230         {
231                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
232                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
233                 std::vector<deUint32>   zeroBuffer              (bufferElements, 0);
234
235                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
236
237                 gl.genBuffers(1, &m_storageBuf);
238                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
239                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
240                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
241         }
242         else if (m_storage == STORAGE_IMAGE)
243         {
244                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
245                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
246
247                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
248
249                 gl.genTextures(1, &m_storageTex);
250                 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
251                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
252                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
253                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
254                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
255
256                 // Zero-fill
257                 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
258
259                 {
260                         const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
261                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
262                         GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
263                 }
264         }
265         else
266                 DE_ASSERT(DE_FALSE);
267
268         // destination
269
270         {
271                 const int                               bufferElements  = m_workWidth * m_workHeight;
272                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
273                 std::vector<deInt32>    negativeBuffer  (bufferElements, -1);
274
275                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
276
277                 gl.genBuffers(1, &m_resultBuf);
278                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
279                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
280                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
281         }
282 }
283
284 void InterInvocationTestCase::deinit (void)
285 {
286         if (m_storageBuf)
287         {
288                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
289                 m_storageBuf = DE_NULL;
290         }
291
292         if (m_storageTex)
293         {
294                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
295                 m_storageTex = DE_NULL;
296         }
297
298         if (m_resultBuf)
299         {
300                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
301                 m_resultBuf = DE_NULL;
302         }
303
304         delete m_program;
305         m_program = DE_NULL;
306 }
307
308 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
309 {
310         // Dispatch
311         runCompute();
312
313         // Verify buffer contents
314         if (verifyResults())
315                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
316         else
317                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
318
319         return STOP;
320 }
321
322 void InterInvocationTestCase::runCompute (void)
323 {
324         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
325         const int                               groupsX = m_workWidth / m_localWidth;
326         const int                               groupsY = m_workHeight / m_localHeight;
327
328         DE_ASSERT((m_workWidth % m_localWidth) == 0);
329         DE_ASSERT((m_workHeight % m_localHeight) == 0);
330
331         m_testCtx.getLog()
332                 << tcu::TestLog::Message
333                 << "Dispatching compute.\n"
334                 << "    group size: " << m_localWidth << "x" << m_localHeight << "\n"
335                 << "    dispatch size: " << groupsX << "x" << groupsY << "\n"
336                 << "    total work size: " << m_workWidth << "x" << m_workHeight << "\n"
337                 << tcu::TestLog::EndMessage;
338
339         gl.useProgram(m_program->getProgram());
340
341         // source
342         if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
343         {
344                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
345                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
346         }
347         else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
348         {
349                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
350                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
351                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
352
353                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
354         }
355         else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
356         {
357                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
358                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
359         }
360         else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
361         {
362                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
363                 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
364
365                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
366
367                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
368         }
369         else
370                 DE_ASSERT(DE_FALSE);
371
372         // destination
373         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
374         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
375
376         // dispatch
377         gl.dispatchCompute(groupsX, groupsY, 1);
378         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
379 }
380
381 bool InterInvocationTestCase::verifyResults (void)
382 {
383         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
384         const int                               errorFloodThreshold     = 5;
385         int                                             numErrorsLogged         = 0;
386         const void*                             mapped                          = DE_NULL;
387         std::vector<deInt32>    results                         (m_workWidth * m_workHeight);
388         bool                                    error                           = false;
389
390         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
391         gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
392         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
393         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
394
395         // copy to properly aligned array
396         deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
397
398         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
399                 throw tcu::TestError("memory map store corrupted");
400
401         // check the results
402         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
403         {
404                 if (results[ndx] != 1)
405                 {
406                         error = true;
407
408                         if (numErrorsLogged == 0)
409                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
410                         if (numErrorsLogged++ < errorFloodThreshold)
411                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
412                         else
413                         {
414                                 // after N errors, no point continuing verification
415                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
416                                 break;
417                         }
418                 }
419         }
420
421         if (!error)
422                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
423         return !error;
424 }
425
426 std::string InterInvocationTestCase::genBarrierSource (void) const
427 {
428         std::ostringstream buf;
429
430         if (m_syncWithGroup)
431         {
432                 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
433                 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
434                 //       we only require intra-workgroup synchronization.
435                 buf << "\n"
436                         << "    groupMemoryBarrier();\n"
437                         << "    barrier();\n"
438                         << "\n";
439         }
440         else if (m_storage == STORAGE_BUFFER)
441         {
442                 DE_ASSERT(!m_syncWithGroup);
443
444                 // Waiting only for data written by this invocation. Since all buffer reads and writes are
445                 // processed in order (within a single invocation), we don't have to do anything.
446                 buf << "\n";
447         }
448         else if (m_storage == STORAGE_IMAGE)
449         {
450                 DE_ASSERT(!m_syncWithGroup);
451
452                 // Waiting only for data written by this invocation. But since operations complete in undefined
453                 // order, we have to wait for them to complete.
454                 buf << "\n"
455                         << "    memoryBarrierImage();\n"
456                         << "\n";
457         }
458         else
459                 DE_ASSERT(DE_FALSE);
460
461         return buf.str();
462 }
463
464 class InvocationBasicCase : public InterInvocationTestCase
465 {
466 public:
467                                                         InvocationBasicCase             (Context& context, const char* name, const char* desc, StorageType storage, int flags);
468 private:
469         std::string                             genShaderSource                 (void) const;
470         virtual std::string             genShaderMainBlock              (void) const = 0;
471 };
472
473 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
474         : InterInvocationTestCase(context, name, desc, storage, flags)
475 {
476 }
477
478 std::string InvocationBasicCase::genShaderSource (void) const
479 {
480         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
481         std::ostringstream      buf;
482
483         buf << "${GLSL_VERSION_DECL}\n"
484                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
485                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
486                 << "layout(binding=0, std430) buffer Output\n"
487                 << "{\n"
488                 << "    highp int values[];\n"
489                 << "} sb_result;\n";
490
491         if (m_storage == STORAGE_BUFFER)
492                 buf << "layout(binding=1, std430) coherent buffer Storage\n"
493                         << "{\n"
494                         << "    highp int values[];\n"
495                         << "} sb_store;\n"
496                         << "\n"
497                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
498                         << "{\n"
499                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
500                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
501                         << "}\n";
502         else if (m_storage == STORAGE_IMAGE)
503                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
504                         << "\n"
505                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
506                         << "{\n"
507                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
508                         << "}\n";
509         else
510                 DE_ASSERT(DE_FALSE);
511
512         buf << "\n"
513                 << "void main (void)\n"
514                 << "{\n"
515                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
516                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
517                 << "    bool allOk      = true;\n"
518                 << "\n"
519                 << genShaderMainBlock()
520                 << "\n"
521                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
522                 << "}\n";
523
524         return specializeShader(m_context, buf.str().c_str());
525 }
526
527 class InvocationWriteReadCase : public InvocationBasicCase
528 {
529 public:
530                                         InvocationWriteReadCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
531 private:
532         std::string             genShaderMainBlock                      (void) const;
533 };
534
535 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
536         : InvocationBasicCase(context, name, desc, storage, flags)
537 {
538 }
539
540 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
541 {
542         std::ostringstream buf;
543
544         // write
545
546         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
547         {
548                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
549                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
550                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
551                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
552                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
553                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
554                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
555                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
556                 else
557                         DE_ASSERT(DE_FALSE);
558         }
559
560         // barrier
561
562         buf << genBarrierSource();
563
564         // read
565
566         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
567         {
568                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
569
570                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
571                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
572                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
573                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
574                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
575                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
576                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
577                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
578                 else
579                         DE_ASSERT(DE_FALSE);
580         }
581
582         return buf.str();
583 }
584
585 class InvocationReadWriteCase : public InvocationBasicCase
586 {
587 public:
588                                         InvocationReadWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
589 private:
590         std::string             genShaderMainBlock                      (void) const;
591 };
592
593 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
594         : InvocationBasicCase(context, name, desc, storage, flags)
595 {
596 }
597
598 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
599 {
600         std::ostringstream buf;
601
602         // read
603
604         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
605         {
606                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
607
608                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
609                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
610                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
611                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
612                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
613                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
614                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
615                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
616                 else
617                         DE_ASSERT(DE_FALSE);
618         }
619
620         // barrier
621
622         buf << genBarrierSource();
623
624         // write
625
626         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
627         {
628                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
629                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
630                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
631                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
632                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
633                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
634                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
635                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
636                 else
637                         DE_ASSERT(DE_FALSE);
638         }
639
640         return buf.str();
641 }
642
643 class InvocationOverWriteCase : public InvocationBasicCase
644 {
645 public:
646                                         InvocationOverWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
647 private:
648         std::string             genShaderMainBlock                      (void) const;
649 };
650
651 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
652         : InvocationBasicCase(context, name, desc, storage, flags)
653 {
654 }
655
656 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
657 {
658         std::ostringstream buf;
659
660         // write
661
662         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
663         {
664                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
665                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
666                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
668                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
670                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
672                 else
673                         DE_ASSERT(DE_FALSE);
674         }
675
676         // barrier
677
678         buf << genBarrierSource();
679
680         // write over
681
682         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683         {
684                 // write another invocation's value or our own value depending on test type
685                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686
687                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
688                         buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
689                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690                         buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
691                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692                         buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
693                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694                         buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
695                 else
696                         DE_ASSERT(DE_FALSE);
697         }
698
699         // barrier
700
701         buf << genBarrierSource();
702
703         // read
704
705         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
706         {
707                 // check another invocation's value or our own value depending on test type
708                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
709
710                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
711                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
712                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
713                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
714                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
715                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
716                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
717                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
718                 else
719                         DE_ASSERT(DE_FALSE);
720         }
721
722         return buf.str();
723 }
724
725 class InvocationAliasWriteCase : public InterInvocationTestCase
726 {
727 public:
728         enum TestType
729         {
730                 TYPE_WRITE = 0,
731                 TYPE_OVERWRITE,
732
733                 TYPE_LAST
734         };
735
736                                         InvocationAliasWriteCase        (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
737 private:
738         std::string             genShaderSource                         (void) const;
739
740         const TestType  m_type;
741 };
742
743 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
744         : InterInvocationTestCase       (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
745         , m_type                                        (type)
746 {
747         DE_ASSERT(type < TYPE_LAST);
748 }
749
750 std::string InvocationAliasWriteCase::genShaderSource (void) const
751 {
752         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
753         std::ostringstream      buf;
754
755         buf << "${GLSL_VERSION_DECL}\n"
756                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
757                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
758                 << "layout(binding=0, std430) buffer Output\n"
759                 << "{\n"
760                 << "    highp int values[];\n"
761                 << "} sb_result;\n";
762
763         if (m_storage == STORAGE_BUFFER)
764                 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
765                         << "{\n"
766                         << "    highp int values[];\n"
767                         << "} sb_store0;\n"
768                         << "layout(binding=2, std430) coherent buffer Storage1\n"
769                         << "{\n"
770                         << "    highp int values[];\n"
771                         << "} sb_store1;\n"
772                         << "\n"
773                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
774                         << "{\n"
775                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
776                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
777                         << "}\n";
778         else if (m_storage == STORAGE_IMAGE)
779                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
780                         << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
781                         << "\n"
782                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
783                         << "{\n"
784                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
785                         << "}\n";
786         else
787                 DE_ASSERT(DE_FALSE);
788
789         buf << "\n"
790                 << "void main (void)\n"
791                 << "{\n"
792                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
793                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
794                 << "    bool allOk      = true;\n"
795                 << "\n";
796
797         if (m_type == TYPE_OVERWRITE)
798         {
799                 // write
800
801                 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802                 {
803                         if (m_storage == STORAGE_BUFFER && m_useAtomic)
804                                 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
805                         else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
806                                 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
807                         else if (m_storage == STORAGE_IMAGE && m_useAtomic)
808                                 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
809                         else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
810                                 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
811                         else
812                                 DE_ASSERT(DE_FALSE);
813                 }
814
815                 // barrier
816
817                 buf << genBarrierSource();
818         }
819         else
820                 DE_ASSERT(m_type == TYPE_WRITE);
821
822         // write (again)
823
824         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
825         {
826                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
827
828                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
829                         buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
830                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
831                         buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
832                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
833                         buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
834                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
835                         buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
836                 else
837                         DE_ASSERT(DE_FALSE);
838         }
839
840         // barrier
841
842         buf << genBarrierSource();
843
844         // read
845
846         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
847         {
848                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
849                         buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
850                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
851                         buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
852                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
853                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
854                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
855                         buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
856                 else
857                         DE_ASSERT(DE_FALSE);
858         }
859
860         // return result
861
862         buf << "\n"
863                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
864                 << "}\n";
865
866         return specializeShader(m_context, buf.str().c_str());
867 }
868
869 namespace op
870 {
871
872 struct WriteData
873 {
874         int targetHandle;
875         int seed;
876
877         static WriteData Generate(int targetHandle, int seed)
878         {
879                 WriteData retVal;
880
881                 retVal.targetHandle = targetHandle;
882                 retVal.seed = seed;
883
884                 return retVal;
885         }
886 };
887
888 struct ReadData
889 {
890         int targetHandle;
891         int seed;
892
893         static ReadData Generate(int targetHandle, int seed)
894         {
895                 ReadData retVal;
896
897                 retVal.targetHandle = targetHandle;
898                 retVal.seed = seed;
899
900                 return retVal;
901         }
902 };
903
904 struct Barrier
905 {
906 };
907
908 struct WriteDataInterleaved
909 {
910         int             targetHandle;
911         int             seed;
912         bool    evenOdd;
913
914         static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
915         {
916                 WriteDataInterleaved retVal;
917
918                 retVal.targetHandle = targetHandle;
919                 retVal.seed = seed;
920                 retVal.evenOdd = evenOdd;
921
922                 return retVal;
923         }
924 };
925
926 struct ReadDataInterleaved
927 {
928         int targetHandle;
929         int seed0;
930         int seed1;
931
932         static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
933         {
934                 ReadDataInterleaved retVal;
935
936                 retVal.targetHandle = targetHandle;
937                 retVal.seed0 = seed0;
938                 retVal.seed1 = seed1;
939
940                 return retVal;
941         }
942 };
943
944 struct ReadMultipleData
945 {
946         int targetHandle0;
947         int seed0;
948         int targetHandle1;
949         int seed1;
950
951         static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
952         {
953                 ReadMultipleData retVal;
954
955                 retVal.targetHandle0 = targetHandle0;
956                 retVal.seed0 = seed0;
957                 retVal.targetHandle1 = targetHandle1;
958                 retVal.seed1 = seed1;
959
960                 return retVal;
961         }
962 };
963
964 struct ReadZeroData
965 {
966         int targetHandle;
967
968         static ReadZeroData Generate(int targetHandle)
969         {
970                 ReadZeroData retVal;
971
972                 retVal.targetHandle = targetHandle;
973
974                 return retVal;
975         }
976 };
977
978 } // namespace op
979
980 class InterCallTestCase;
981
982 class InterCallOperations
983 {
984 public:
985         InterCallOperations& operator<< (const op::WriteData&);
986         InterCallOperations& operator<< (const op::ReadData&);
987         InterCallOperations& operator<< (const op::Barrier&);
988         InterCallOperations& operator<< (const op::ReadMultipleData&);
989         InterCallOperations& operator<< (const op::WriteDataInterleaved&);
990         InterCallOperations& operator<< (const op::ReadDataInterleaved&);
991         InterCallOperations& operator<< (const op::ReadZeroData&);
992
993 private:
994         struct Command
995         {
996                 enum CommandType
997                 {
998                         TYPE_WRITE = 0,
999                         TYPE_READ,
1000                         TYPE_BARRIER,
1001                         TYPE_READ_MULTIPLE,
1002                         TYPE_WRITE_INTERLEAVE,
1003                         TYPE_READ_INTERLEAVE,
1004                         TYPE_READ_ZERO,
1005
1006                         TYPE_LAST
1007                 };
1008
1009                 CommandType type;
1010
1011                 union CommandUnion
1012                 {
1013                         op::WriteData                           write;
1014                         op::ReadData                            read;
1015                         op::Barrier                                     barrier;
1016                         op::ReadMultipleData            readMulti;
1017                         op::WriteDataInterleaved        writeInterleave;
1018                         op::ReadDataInterleaved         readInterleave;
1019                         op::ReadZeroData                        readZero;
1020                 } u_cmd;
1021         };
1022
1023         friend class InterCallTestCase;
1024
1025         std::vector<Command> m_cmds;
1026 };
1027
1028 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1029 {
1030         m_cmds.push_back(Command());
1031         m_cmds.back().type = Command::TYPE_WRITE;
1032         m_cmds.back().u_cmd.write = cmd;
1033
1034         return *this;
1035 }
1036
1037 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1038 {
1039         m_cmds.push_back(Command());
1040         m_cmds.back().type = Command::TYPE_READ;
1041         m_cmds.back().u_cmd.read = cmd;
1042
1043         return *this;
1044 }
1045
1046 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1047 {
1048         m_cmds.push_back(Command());
1049         m_cmds.back().type = Command::TYPE_BARRIER;
1050         m_cmds.back().u_cmd.barrier = cmd;
1051
1052         return *this;
1053 }
1054
1055 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1056 {
1057         m_cmds.push_back(Command());
1058         m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1059         m_cmds.back().u_cmd.readMulti = cmd;
1060
1061         return *this;
1062 }
1063
1064 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1065 {
1066         m_cmds.push_back(Command());
1067         m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1068         m_cmds.back().u_cmd.writeInterleave = cmd;
1069
1070         return *this;
1071 }
1072
1073 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1074 {
1075         m_cmds.push_back(Command());
1076         m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1077         m_cmds.back().u_cmd.readInterleave = cmd;
1078
1079         return *this;
1080 }
1081
1082 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1083 {
1084         m_cmds.push_back(Command());
1085         m_cmds.back().type = Command::TYPE_READ_ZERO;
1086         m_cmds.back().u_cmd.readZero = cmd;
1087
1088         return *this;
1089 }
1090
1091 class InterCallTestCase : public TestCase
1092 {
1093 public:
1094         enum StorageType
1095         {
1096                 STORAGE_BUFFER = 0,
1097                 STORAGE_IMAGE,
1098
1099                 STORAGE_LAST
1100         };
1101         enum Flags
1102         {
1103                 FLAG_USE_ATOMIC = 1,
1104                 FLAG_USE_INT    = 2,
1105         };
1106                                                                                                         InterCallTestCase                       (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1107                                                                                                         ~InterCallTestCase                      (void);
1108
1109 private:
1110         void                                                                                    init                                            (void);
1111         void                                                                                    deinit                                          (void);
1112         IterateResult                                                                   iterate                                         (void);
1113         bool                                                                                    verifyResults                           (void);
1114
1115         void                                                                                    runCommand                                      (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1116         void                                                                                    runCommand                                      (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117         void                                                                                    runCommand                                      (const op::Barrier&);
1118         void                                                                                    runCommand                                      (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1119         void                                                                                    runCommand                                      (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1120         void                                                                                    runCommand                                      (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1121         void                                                                                    runCommand                                      (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1122         void                                                                                    runSingleRead                           (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1123
1124         glw::GLuint                                                                             genStorage                                      (int friendlyName);
1125         glw::GLuint                                                                             genResultStorage                        (void);
1126         glu::ShaderProgram*                                                             genWriteProgram                         (int seed);
1127         glu::ShaderProgram*                                                             genReadProgram                          (int seed);
1128         glu::ShaderProgram*                                                             genReadMultipleProgram          (int seed0, int seed1);
1129         glu::ShaderProgram*                                                             genWriteInterleavedProgram      (int seed, bool evenOdd);
1130         glu::ShaderProgram*                                                             genReadInterleavedProgram       (int seed0, int seed1);
1131         glu::ShaderProgram*                                                             genReadZeroProgram                      (void);
1132
1133         const StorageType                                                               m_storage;
1134         const int                                                                               m_invocationGridSize;   // !< width and height of the two dimensional work dispatch
1135         const int                                                                               m_perInvocationSize;    // !< number of elements accessed in single invocation
1136         const std::vector<InterCallOperations::Command> m_cmds;
1137         const bool                                                                              m_useAtomic;
1138         const bool                                                                              m_formatInteger;
1139
1140         std::vector<glu::ShaderProgram*>                                m_operationPrograms;
1141         std::vector<glw::GLuint>                                                m_operationResultStorages;
1142         std::map<int, glw::GLuint>                                              m_storageIDs;
1143 };
1144
1145 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1146         : TestCase                                      (context, name, desc)
1147         , m_storage                                     (storage)
1148         , m_invocationGridSize          (512)
1149         , m_perInvocationSize           (2)
1150         , m_cmds                                        (ops.m_cmds)
1151         , m_useAtomic                           ((flags & FLAG_USE_ATOMIC) != 0)
1152         , m_formatInteger                       ((flags & FLAG_USE_INT) != 0)
1153 {
1154 }
1155
1156 InterCallTestCase::~InterCallTestCase (void)
1157 {
1158         deinit();
1159 }
1160
1161 void InterCallTestCase::init (void)
1162 {
1163         int                     programFriendlyName = 0;
1164
1165         // requirements
1166
1167         if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
1168                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1169
1170         // generate resources and validate command list
1171
1172         m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1173         m_operationResultStorages.resize(m_cmds.size(), 0);
1174
1175         for (int step = 0; step < (int)m_cmds.size(); ++step)
1176         {
1177                 switch (m_cmds[step].type)
1178                 {
1179                         case InterCallOperations::Command::TYPE_WRITE:
1180                         {
1181                                 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1182
1183                                 // new storage handle?
1184                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1185                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1186
1187                                 // program
1188                                 {
1189                                         glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1190
1191                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1192                                         m_testCtx.getLog() << *program;
1193
1194                                         if (!program->isOk())
1195                                                 throw tcu::TestError("could not build program");
1196
1197                                         m_operationPrograms[step] = program;
1198                                 }
1199                                 break;
1200                         }
1201
1202                         case InterCallOperations::Command::TYPE_READ:
1203                         {
1204                                 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1205                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1206
1207                                 // program and result storage
1208                                 {
1209                                         glu::ShaderProgram* program = genReadProgram(cmd.seed);
1210
1211                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1212                                         m_testCtx.getLog() << *program;
1213
1214                                         if (!program->isOk())
1215                                                 throw tcu::TestError("could not build program");
1216
1217                                         m_operationPrograms[step] = program;
1218                                         m_operationResultStorages[step] = genResultStorage();
1219                                 }
1220                                 break;
1221                         }
1222
1223                         case InterCallOperations::Command::TYPE_BARRIER:
1224                         {
1225                                 break;
1226                         }
1227
1228                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1229                         {
1230                                 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1231                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1232                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1233
1234                                 // program
1235                                 {
1236                                         glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1237
1238                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1239                                         m_testCtx.getLog() << *program;
1240
1241                                         if (!program->isOk())
1242                                                 throw tcu::TestError("could not build program");
1243
1244                                         m_operationPrograms[step] = program;
1245                                         m_operationResultStorages[step] = genResultStorage();
1246                                 }
1247                                 break;
1248                         }
1249
1250                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1251                         {
1252                                 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1253
1254                                 // new storage handle?
1255                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1256                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1257
1258                                 // program
1259                                 {
1260                                         glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1261
1262                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1263                                         m_testCtx.getLog() << *program;
1264
1265                                         if (!program->isOk())
1266                                                 throw tcu::TestError("could not build program");
1267
1268                                         m_operationPrograms[step] = program;
1269                                 }
1270                                 break;
1271                         }
1272
1273                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1274                         {
1275                                 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1276                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1277
1278                                 // program
1279                                 {
1280                                         glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1281
1282                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1283                                         m_testCtx.getLog() << *program;
1284
1285                                         if (!program->isOk())
1286                                                 throw tcu::TestError("could not build program");
1287
1288                                         m_operationPrograms[step] = program;
1289                                         m_operationResultStorages[step] = genResultStorage();
1290                                 }
1291                                 break;
1292                         }
1293
1294                         case InterCallOperations::Command::TYPE_READ_ZERO:
1295                         {
1296                                 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1297
1298                                 // new storage handle?
1299                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1300                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1301
1302                                 // program
1303                                 {
1304                                         glu::ShaderProgram* program = genReadZeroProgram();
1305
1306                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1307                                         m_testCtx.getLog() << *program;
1308
1309                                         if (!program->isOk())
1310                                                 throw tcu::TestError("could not build program");
1311
1312                                         m_operationPrograms[step] = program;
1313                                         m_operationResultStorages[step] = genResultStorage();
1314                                 }
1315                                 break;
1316                         }
1317
1318                         default:
1319                                 DE_ASSERT(DE_FALSE);
1320                 }
1321         }
1322 }
1323
1324 void InterCallTestCase::deinit (void)
1325 {
1326         // programs
1327         for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1328                 delete m_operationPrograms[ndx];
1329         m_operationPrograms.clear();
1330
1331         // result storages
1332         for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1333         {
1334                 if (m_operationResultStorages[ndx])
1335                         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1336         }
1337         m_operationResultStorages.clear();
1338
1339         // storage
1340         for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1341         {
1342                 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1343
1344                 if (m_storage == STORAGE_BUFFER)
1345                         gl.deleteBuffers(1, &it->second);
1346                 else if (m_storage == STORAGE_IMAGE)
1347                         gl.deleteTextures(1, &it->second);
1348                 else
1349                         DE_ASSERT(DE_FALSE);
1350         }
1351         m_storageIDs.clear();
1352 }
1353
1354 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1355 {
1356         int programFriendlyName                 = 0;
1357         int resultStorageFriendlyName   = 0;
1358
1359         m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1360
1361         // run steps
1362
1363         for (int step = 0; step < (int)m_cmds.size(); ++step)
1364         {
1365                 switch (m_cmds[step].type)
1366                 {
1367                         case InterCallOperations::Command::TYPE_WRITE:                          runCommand(m_cmds[step].u_cmd.write,                    step,   programFriendlyName);                                                           break;
1368                         case InterCallOperations::Command::TYPE_READ:                           runCommand(m_cmds[step].u_cmd.read,                             step,   programFriendlyName, resultStorageFriendlyName);        break;
1369                         case InterCallOperations::Command::TYPE_BARRIER:                        runCommand(m_cmds[step].u_cmd.barrier);                                                                                                                                         break;
1370                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:          runCommand(m_cmds[step].u_cmd.readMulti,                step,   programFriendlyName, resultStorageFriendlyName);        break;
1371                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:       runCommand(m_cmds[step].u_cmd.writeInterleave,  step,   programFriendlyName);                                                           break;
1372                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:        runCommand(m_cmds[step].u_cmd.readInterleave,   step,   programFriendlyName, resultStorageFriendlyName);        break;
1373                         case InterCallOperations::Command::TYPE_READ_ZERO:                      runCommand(m_cmds[step].u_cmd.readZero,                 step,   programFriendlyName, resultStorageFriendlyName);        break;
1374                         default:
1375                                 DE_ASSERT(DE_FALSE);
1376                 }
1377         }
1378
1379         // read results from result buffers
1380         if (verifyResults())
1381                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1382         else
1383                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1384
1385         return STOP;
1386 }
1387
1388 bool InterCallTestCase::verifyResults (void)
1389 {
1390         int             resultBufferFriendlyName        = 0;
1391         bool    allResultsOk                            = true;
1392         bool    anyResult                                       = false;
1393
1394         m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1395
1396         for (int step = 0; step < (int)m_cmds.size(); ++step)
1397         {
1398                 const int       errorFloodThreshold     = 5;
1399                 int                     numErrorsLogged         = 0;
1400
1401                 if (m_operationResultStorages[step])
1402                 {
1403                         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1404                         const void*                             mapped  = DE_NULL;
1405                         std::vector<deInt32>    results (m_invocationGridSize * m_invocationGridSize);
1406                         bool                                    error   = false;
1407
1408                         anyResult = true;
1409
1410                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1411                         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1412                         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1413
1414                         // copy to properly aligned array
1415                         deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1416
1417                         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1418                                 throw tcu::TestError("memory map store corrupted");
1419
1420                         // check the results
1421                         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1422                         {
1423                                 if (results[ndx] != 1)
1424                                 {
1425                                         error = true;
1426
1427                                         if (numErrorsLogged == 0)
1428                                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1429                                         if (numErrorsLogged++ < errorFloodThreshold)
1430                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1431                                         else
1432                                         {
1433                                                 // after N errors, no point continuing verification
1434                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1435                                                 break;
1436                                         }
1437                                 }
1438                         }
1439
1440                         if (error)
1441                         {
1442                                 allResultsOk = false;
1443                         }
1444                         else
1445                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1446                 }
1447         }
1448
1449         DE_ASSERT(anyResult);
1450         DE_UNREF(anyResult);
1451
1452         return allResultsOk;
1453 }
1454
1455 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1456 {
1457         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1458
1459         m_testCtx.getLog()
1460                 << tcu::TestLog::Message
1461                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1462                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1463                 << tcu::TestLog::EndMessage;
1464
1465         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1466
1467         // set destination
1468         if (m_storage == STORAGE_BUFFER)
1469         {
1470                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1471
1472                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1473                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1474         }
1475         else if (m_storage == STORAGE_IMAGE)
1476         {
1477                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1478
1479                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1480                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1481         }
1482         else
1483                 DE_ASSERT(DE_FALSE);
1484
1485         // calc
1486         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1487         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1488 }
1489
1490 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1491 {
1492         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1493 }
1494
1495 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1496 {
1497         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1498
1499         DE_UNREF(cmd);
1500
1501         if (m_storage == STORAGE_BUFFER)
1502         {
1503                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504                 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1505         }
1506         else if (m_storage == STORAGE_IMAGE)
1507         {
1508                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1509                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1510         }
1511         else
1512                 DE_ASSERT(DE_FALSE);
1513 }
1514
1515 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1516 {
1517         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1518
1519         m_testCtx.getLog()
1520                 << tcu::TestLog::Message
1521                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1522                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1523                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1524                 << tcu::TestLog::EndMessage;
1525
1526         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1527
1528         // set sources
1529         if (m_storage == STORAGE_BUFFER)
1530         {
1531                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1532                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1533
1534                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1535                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1536                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1537         }
1538         else if (m_storage == STORAGE_IMAGE)
1539         {
1540                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1541                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1542
1543                 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1544                 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1545                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1546         }
1547         else
1548                 DE_ASSERT(DE_FALSE);
1549
1550         // set destination
1551         DE_ASSERT(m_operationResultStorages[stepNdx]);
1552         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1553         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1554
1555         // calc
1556         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1557         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1558 }
1559
1560 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1561 {
1562         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1563
1564         m_testCtx.getLog()
1565                 << tcu::TestLog::Message
1566                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1567                 << "    Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1568                 << "    Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1569                 << tcu::TestLog::EndMessage;
1570
1571         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1572
1573         // set destination
1574         if (m_storage == STORAGE_BUFFER)
1575         {
1576                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1577
1578                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1579                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1580         }
1581         else if (m_storage == STORAGE_IMAGE)
1582         {
1583                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1584
1585                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1586                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1587         }
1588         else
1589                 DE_ASSERT(DE_FALSE);
1590
1591         // calc
1592         gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1593         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1594 }
1595
1596 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 {
1598         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599 }
1600
1601 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 {
1603         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1604 }
1605
1606 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1607 {
1608         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1609
1610         m_testCtx.getLog()
1611                 << tcu::TestLog::Message
1612                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1613                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1614                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1615                 << tcu::TestLog::EndMessage;
1616
1617         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1618
1619         // set source
1620         if (m_storage == STORAGE_BUFFER)
1621         {
1622                 DE_ASSERT(m_storageIDs[targetHandle]);
1623
1624                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1625                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1626         }
1627         else if (m_storage == STORAGE_IMAGE)
1628         {
1629                 DE_ASSERT(m_storageIDs[targetHandle]);
1630
1631                 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1632                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1633         }
1634         else
1635                 DE_ASSERT(DE_FALSE);
1636
1637         // set destination
1638         DE_ASSERT(m_operationResultStorages[stepNdx]);
1639         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1640         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1641
1642         // calc
1643         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1644         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1645 }
1646
1647 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1648 {
1649         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1650
1651         if (m_storage == STORAGE_BUFFER)
1652         {
1653                 const int               numElements             = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1654                 const int               bufferSize              = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1655                 glw::GLuint             retVal                  = 0;
1656
1657                 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1658
1659                 gl.genBuffers(1, &retVal);
1660                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1661
1662                 if (m_formatInteger)
1663                 {
1664                         const std::vector<deUint32> zeroBuffer(numElements, 0);
1665                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666                 }
1667                 else
1668                 {
1669                         const std::vector<float> zeroBuffer(numElements, 0.0f);
1670                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1671                 }
1672                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1673
1674                 return retVal;
1675         }
1676         else if (m_storage == STORAGE_IMAGE)
1677         {
1678                 const int       imageWidth      = m_invocationGridSize;
1679                 const int       imageHeight     = m_invocationGridSize * m_perInvocationSize;
1680                 glw::GLuint     retVal          = 0;
1681
1682                 m_testCtx.getLog()
1683                         << tcu::TestLog::Message
1684                         << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1685                         << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1686                         << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1687                         << tcu::TestLog::EndMessage;
1688
1689                 gl.genTextures(1, &retVal);
1690                 gl.bindTexture(GL_TEXTURE_2D, retVal);
1691
1692                 if (m_formatInteger)
1693                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1694                 else
1695                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1696
1697                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1698                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1699                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1700
1701                 m_testCtx.getLog()
1702                         << tcu::TestLog::Message
1703                         << "Filling image with 0"
1704                         << tcu::TestLog::EndMessage;
1705
1706                 if (m_formatInteger)
1707                 {
1708                         const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1709                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1710                 }
1711                 else
1712                 {
1713                         const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1714                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1715                 }
1716
1717                 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1718
1719                 return retVal;
1720         }
1721         else
1722         {
1723                 DE_ASSERT(DE_FALSE);
1724                 return 0;
1725         }
1726 }
1727
1728 glw::GLuint InterCallTestCase::genResultStorage (void)
1729 {
1730         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1731         glw::GLuint                             retVal  = 0;
1732
1733         gl.genBuffers(1, &retVal);
1734         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1735         gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1736         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1737
1738         return retVal;
1739 }
1740
1741 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1742 {
1743         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1744         std::ostringstream      buf;
1745
1746         buf << "${GLSL_VERSION_DECL}\n"
1747                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1748                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1749
1750         if (m_storage == STORAGE_BUFFER)
1751                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1752                         << "{\n"
1753                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1754                         << "} sb_out;\n";
1755         else if (m_storage == STORAGE_IMAGE)
1756                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1757         else
1758                 DE_ASSERT(DE_FALSE);
1759
1760         buf << "\n"
1761                 << "void main (void)\n"
1762                 << "{\n"
1763                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1764                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1765                 << "\n";
1766
1767         // Write to buffer/image m_perInvocationSize elements
1768         if (m_storage == STORAGE_BUFFER)
1769         {
1770                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1771                 {
1772                         if (m_useAtomic)
1773                                 buf << "        atomicExchange(";
1774                         else
1775                                 buf << "        ";
1776
1777                         buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1778
1779                         if (m_useAtomic)
1780                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1781                         else
1782                                 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1783                 }
1784         }
1785         else if (m_storage == STORAGE_IMAGE)
1786         {
1787                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1788                 {
1789                         if (m_useAtomic)
1790                                 buf << "        imageAtomicExchange";
1791                         else
1792                                 buf << "        imageStore";
1793
1794                         buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1795
1796                         if (m_useAtomic)
1797                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1798                         else
1799                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1800                 }
1801         }
1802         else
1803                 DE_ASSERT(DE_FALSE);
1804
1805         buf << "}\n";
1806
1807         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1808 }
1809
1810 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1811 {
1812         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1813         std::ostringstream      buf;
1814
1815         buf << "${GLSL_VERSION_DECL}\n"
1816                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1817                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1818
1819         if (m_storage == STORAGE_BUFFER)
1820                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1821                         << "{\n"
1822                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1823                         << "} sb_in;\n";
1824         else if (m_storage == STORAGE_IMAGE)
1825                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1826         else
1827                 DE_ASSERT(DE_FALSE);
1828
1829         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1830                 << "{\n"
1831                 << "    highp int resultOk[];\n"
1832                 << "} sb_result;\n"
1833                 << "\n"
1834                 << "void main (void)\n"
1835                 << "{\n"
1836                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1837                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1838                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1839                 << "    bool allOk = true;\n"
1840                 << "\n";
1841
1842         // Verify data
1843
1844         if (m_storage == STORAGE_BUFFER)
1845         {
1846                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1847                 {
1848                         if (!m_useAtomic)
1849                                 buf << "        allOk = allOk && (sb_in.values[(groupNdx + "
1850                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1851                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1852                         else
1853                                 buf << "        allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1854                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1855                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1856                 }
1857         }
1858         else if (m_storage == STORAGE_IMAGE)
1859         {
1860                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1861                 {
1862                         if (!m_useAtomic)
1863                                 buf     << "    allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1864                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1865                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1866                         else
1867                                 buf << "        allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1868                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1869                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1870                 }
1871         }
1872         else
1873                 DE_ASSERT(DE_FALSE);
1874
1875         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1876                 << "}\n";
1877
1878         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1879 }
1880
1881 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1882 {
1883         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1884         std::ostringstream      buf;
1885
1886         buf << "${GLSL_VERSION_DECL}\n"
1887                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1888                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1889
1890         if (m_storage == STORAGE_BUFFER)
1891                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1892                         << "{\n"
1893                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1894                         << "} sb_in0;\n"
1895                         << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1896                         << "{\n"
1897                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1898                         << "} sb_in1;\n";
1899         else if (m_storage == STORAGE_IMAGE)
1900                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1901                         << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1902         else
1903                 DE_ASSERT(DE_FALSE);
1904
1905         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1906                 << "{\n"
1907                 << "    highp int resultOk[];\n"
1908                 << "} sb_result;\n"
1909                 << "\n"
1910                 << "void main (void)\n"
1911                 << "{\n"
1912                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1913                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1914                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1915                 << "    bool allOk = true;\n"
1916                 << "\n";
1917
1918         // Verify data
1919
1920         if (m_storage == STORAGE_BUFFER)
1921         {
1922                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1925         }
1926         else if (m_storage == STORAGE_IMAGE)
1927         {
1928                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1929                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1930                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1931         }
1932         else
1933                 DE_ASSERT(DE_FALSE);
1934
1935         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1936                 << "}\n";
1937
1938         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1939 }
1940
1941 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1942 {
1943         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1944         std::ostringstream      buf;
1945
1946         buf << "${GLSL_VERSION_DECL}\n"
1947                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1948                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1949
1950         if (m_storage == STORAGE_BUFFER)
1951                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1952                         << "{\n"
1953                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1954                         << "} sb_out;\n";
1955         else if (m_storage == STORAGE_IMAGE)
1956                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1957         else
1958                 DE_ASSERT(DE_FALSE);
1959
1960         buf << "\n"
1961                 << "void main (void)\n"
1962                 << "{\n"
1963                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1964                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1965                 << "\n";
1966
1967         // Write to buffer/image m_perInvocationSize elements
1968         if (m_storage == STORAGE_BUFFER)
1969         {
1970                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1971                 {
1972                         if (m_useAtomic)
1973                                 buf << "        atomicExchange(";
1974                         else
1975                                 buf << "        ";
1976
1977                         buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1978
1979                         if (m_useAtomic)
1980                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1981                         else
1982                                 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1983                 }
1984         }
1985         else if (m_storage == STORAGE_IMAGE)
1986         {
1987                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1988                 {
1989                         if (m_useAtomic)
1990                                 buf << "        imageAtomicExchange";
1991                         else
1992                                 buf << "        imageStore";
1993
1994                         buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1995
1996                         if (m_useAtomic)
1997                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1998                         else
1999                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
2000                 }
2001         }
2002         else
2003                 DE_ASSERT(DE_FALSE);
2004
2005         buf << "}\n";
2006
2007         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2008 }
2009
2010 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2011 {
2012         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2013         std::ostringstream      buf;
2014
2015         buf << "${GLSL_VERSION_DECL}\n"
2016                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2017                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2018
2019         if (m_storage == STORAGE_BUFFER)
2020                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2021                         << "{\n"
2022                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2023                         << "} sb_in;\n";
2024         else if (m_storage == STORAGE_IMAGE)
2025                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2026         else
2027                 DE_ASSERT(DE_FALSE);
2028
2029         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2030                 << "{\n"
2031                 << "    highp int resultOk[];\n"
2032                 << "} sb_result;\n"
2033                 << "\n"
2034                 << "void main (void)\n"
2035                 << "{\n"
2036                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2037                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2038                 << "    int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2039                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2040                 << "    bool allOk = true;\n"
2041                 << "\n";
2042
2043         // Verify data
2044
2045         if (m_storage == STORAGE_BUFFER)
2046         {
2047                 buf << "        if (groupNdx % 2 == 0)\n"
2048                         << "    {\n";
2049                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2050                         buf << "                allOk = allOk && ("
2051                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2052                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2053                 buf << "        }\n"
2054                         << "    else\n"
2055                         << "    {\n";
2056                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2057                         buf << "                allOk = allOk && ("
2058                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2059                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2060                 buf << "        }\n";
2061         }
2062         else if (m_storage == STORAGE_IMAGE)
2063         {
2064                 buf << "        if (groupNdx % 2 == 0)\n"
2065                         << "    {\n";
2066                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2067                         buf << "                allOk = allOk && ("
2068                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2069                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2070                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2071                 buf << "        }\n"
2072                         << "    else\n"
2073                         << "    {\n";
2074                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2075                         buf << "                allOk = allOk && ("
2076                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2077                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2078                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2079                 buf << "        }\n";
2080         }
2081         else
2082                 DE_ASSERT(DE_FALSE);
2083
2084         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2085                 << "}\n";
2086
2087         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2088 }
2089
2090 glu::ShaderProgram*     InterCallTestCase::genReadZeroProgram (void)
2091 {
2092         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2093         std::ostringstream      buf;
2094
2095         buf << "${GLSL_VERSION_DECL}\n"
2096                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2097                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2098
2099         if (m_storage == STORAGE_BUFFER)
2100                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2101                         << "{\n"
2102                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2103                         << "} sb_in;\n";
2104         else if (m_storage == STORAGE_IMAGE)
2105                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2106         else
2107                 DE_ASSERT(DE_FALSE);
2108
2109         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2110                 << "{\n"
2111                 << "    highp int resultOk[];\n"
2112                 << "} sb_result;\n"
2113                 << "\n"
2114                 << "void main (void)\n"
2115                 << "{\n"
2116                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2117                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2118                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2119                 << "    bool allOk = true;\n"
2120                 << "\n";
2121
2122         // Verify data
2123
2124         if (m_storage == STORAGE_BUFFER)
2125         {
2126                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2127                         buf << "        allOk = allOk && ("
2128                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2129                                 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2130         }
2131         else if (m_storage == STORAGE_IMAGE)
2132         {
2133                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2134                         buf << "        allOk = allOk && ("
2135                         << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2136                         << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2137         }
2138         else
2139                 DE_ASSERT(DE_FALSE);
2140
2141         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2142                 << "}\n";
2143
2144         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2145 }
2146
2147 class SSBOConcurrentAtomicCase : public TestCase
2148 {
2149 public:
2150
2151                                                         SSBOConcurrentAtomicCase        (Context& context, const char* name, const char* description, int numCalls, int workSize);
2152                                                         ~SSBOConcurrentAtomicCase       (void);
2153
2154         void                                    init                                            (void);
2155         void                                    deinit                                          (void);
2156         IterateResult                   iterate                                         (void);
2157
2158 private:
2159         std::string                             genComputeSource                        (void) const;
2160
2161         const int                               m_numCalls;
2162         const int                               m_workSize;
2163         glu::ShaderProgram*             m_program;
2164         deUint32                                m_bufferID;
2165         std::vector<deUint32>   m_intermediateResultBuffers;
2166 };
2167
2168 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2169         : TestCase              (context, name, description)
2170         , m_numCalls    (numCalls)
2171         , m_workSize    (workSize)
2172         , m_program             (DE_NULL)
2173         , m_bufferID    (DE_NULL)
2174 {
2175 }
2176
2177 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2178 {
2179         deinit();
2180 }
2181
2182 void SSBOConcurrentAtomicCase::init (void)
2183 {
2184         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2185         std::vector<deUint32>   zeroData                        (m_workSize, 0);
2186
2187         // gen buffers
2188
2189         gl.genBuffers(1, &m_bufferID);
2190         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2191         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2192
2193         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2194         {
2195                 deUint32 buffer = 0;
2196
2197                 gl.genBuffers(1, &buffer);
2198                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2199                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2200
2201                 m_intermediateResultBuffers.push_back(buffer);
2202                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2203         }
2204
2205         // gen program
2206
2207         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2208         m_testCtx.getLog() << *m_program;
2209         if (!m_program->isOk())
2210                 throw tcu::TestError("could not build program");
2211 }
2212
2213 void SSBOConcurrentAtomicCase::deinit (void)
2214 {
2215         if (m_bufferID)
2216         {
2217                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2218                 m_bufferID = 0;
2219         }
2220
2221         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2222                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2223         m_intermediateResultBuffers.clear();
2224
2225         delete m_program;
2226         m_program = DE_NULL;
2227 }
2228
2229 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2230 {
2231         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2232         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2233         std::vector<int>                deltas;
2234
2235         // generate unique deltas
2236         generateShuffledRamp(m_numCalls, deltas);
2237
2238         // invoke program N times, each with a different delta
2239         {
2240                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2241
2242                 m_testCtx.getLog()
2243                         << tcu::TestLog::Message
2244                         << "Running shader " << m_numCalls << " times.\n"
2245                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2246                         << "Setting u_atomicDelta to a unique value for each call.\n"
2247                         << tcu::TestLog::EndMessage;
2248
2249                 if (deltaLocation == -1)
2250                         throw tcu::TestError("u_atomicDelta location was -1");
2251
2252                 gl.useProgram(m_program->getProgram());
2253                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2254
2255                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2256                 {
2257                         m_testCtx.getLog()
2258                                 << tcu::TestLog::Message
2259                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2260                                 << tcu::TestLog::EndMessage;
2261
2262                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2263                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2264                         gl.dispatchCompute(m_workSize, 1, 1);
2265                 }
2266
2267                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2268         }
2269
2270         // Verify result
2271         {
2272                 std::vector<deUint32> result;
2273
2274                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2275
2276                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2277                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2278
2279                 for (int ndx = 0; ndx < m_workSize; ++ndx)
2280                 {
2281                         if (result[ndx] != sumValue)
2282                         {
2283                                 m_testCtx.getLog()
2284                                         << tcu::TestLog::Message
2285                                         << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2286                                         << "Work buffer contains invalid values."
2287                                         << tcu::TestLog::EndMessage;
2288
2289                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2290                                 return STOP;
2291                         }
2292                 }
2293
2294                 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2295         }
2296
2297         // verify steps
2298         {
2299                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2300                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2301
2302                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2303
2304                 // collect results
2305
2306                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2307                 {
2308                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2309                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2310                 }
2311
2312                 // verify values
2313
2314                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2315                 {
2316                         int                     invalidOperationNdx;
2317                         deUint32        errorDelta;
2318                         deUint32        errorExpected;
2319
2320                         // collect result chain for each element
2321                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2322                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2323
2324                         // check there exists a path from 0 to sumValue using each addition once
2325                         // decompose cumulative results to addition operations (all additions positive => this works)
2326
2327                         std::sort(valueChain.begin(), valueChain.end());
2328
2329                         // validate chain
2330                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2331                         {
2332                                 m_testCtx.getLog()
2333                                         << tcu::TestLog::Message
2334                                         << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2335                                         << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2336                                         << tcu::TestLog::EndMessage;
2337
2338                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2339                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2340                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2341
2342                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2343                                 return STOP;
2344                         }
2345                 }
2346
2347                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2348         }
2349
2350         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2351         return STOP;
2352 }
2353
2354 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2355 {
2356         std::ostringstream buf;
2357
2358         buf     << "${GLSL_VERSION_DECL}\n"
2359                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2360                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2361                 << "{\n"
2362                 << "    highp uint values[" << m_workSize << "];\n"
2363                 << "} sb_ires;\n"
2364                 << "\n"
2365                 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2366                 << "{\n"
2367                 << "    highp uint values[" << m_workSize << "];\n"
2368                 << "} sb_work;\n"
2369                 << "uniform highp uint u_atomicDelta;\n"
2370                 << "\n"
2371                 << "void main ()\n"
2372                 << "{\n"
2373                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2374                 << "    sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2375                 << "}";
2376
2377         return specializeShader(m_context, buf.str().c_str());
2378 }
2379
2380 class ConcurrentAtomicCounterCase : public TestCase
2381 {
2382 public:
2383
2384                                                         ConcurrentAtomicCounterCase             (Context& context, const char* name, const char* description, int numCalls, int workSize);
2385                                                         ~ConcurrentAtomicCounterCase    (void);
2386
2387         void                                    init                                                    (void);
2388         void                                    deinit                                                  (void);
2389         IterateResult                   iterate                                                 (void);
2390
2391 private:
2392         std::string                             genComputeSource                                (bool evenOdd) const;
2393
2394         const int                               m_numCalls;
2395         const int                               m_workSize;
2396         glu::ShaderProgram*             m_evenProgram;
2397         glu::ShaderProgram*             m_oddProgram;
2398         deUint32                                m_counterBuffer;
2399         deUint32                                m_intermediateResultBuffer;
2400 };
2401
2402 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2403         : TestCase                                      (context, name, description)
2404         , m_numCalls                            (numCalls)
2405         , m_workSize                            (workSize)
2406         , m_evenProgram                         (DE_NULL)
2407         , m_oddProgram                          (DE_NULL)
2408         , m_counterBuffer                       (DE_NULL)
2409         , m_intermediateResultBuffer(DE_NULL)
2410 {
2411 }
2412
2413 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2414 {
2415         deinit();
2416 }
2417
2418 void ConcurrentAtomicCounterCase::init (void)
2419 {
2420         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
2421         const std::vector<deUint32>     zeroData        (m_numCalls * m_workSize, 0);
2422
2423         // gen buffer
2424
2425         gl.genBuffers(1, &m_counterBuffer);
2426         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2427         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2428
2429         gl.genBuffers(1, &m_intermediateResultBuffer);
2430         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2431         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2432
2433         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2434
2435         // gen programs
2436
2437         {
2438                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2439
2440                 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2441                 m_testCtx.getLog() << *m_evenProgram;
2442                 if (!m_evenProgram->isOk())
2443                         throw tcu::TestError("could not build program");
2444         }
2445         {
2446                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2447
2448                 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2449                 m_testCtx.getLog() << *m_oddProgram;
2450                 if (!m_oddProgram->isOk())
2451                         throw tcu::TestError("could not build program");
2452         }
2453 }
2454
2455 void ConcurrentAtomicCounterCase::deinit (void)
2456 {
2457         if (m_counterBuffer)
2458         {
2459                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2460                 m_counterBuffer = 0;
2461         }
2462         if (m_intermediateResultBuffer)
2463         {
2464                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2465                 m_intermediateResultBuffer = 0;
2466         }
2467
2468         delete m_evenProgram;
2469         m_evenProgram = DE_NULL;
2470
2471         delete m_oddProgram;
2472         m_oddProgram = DE_NULL;
2473 }
2474
2475 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2476 {
2477         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2478
2479         // invoke program N times, each with a different delta
2480         {
2481                 const int evenCallNdxLocation   = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2482                 const int oddCallNdxLocation    = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2483
2484                 m_testCtx.getLog()
2485                         << tcu::TestLog::Message
2486                         << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2487                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2488                         << tcu::TestLog::EndMessage;
2489
2490                 if (evenCallNdxLocation == -1)
2491                         throw tcu::TestError("u_callNdx location was -1");
2492                 if (oddCallNdxLocation == -1)
2493                         throw tcu::TestError("u_callNdx location was -1");
2494
2495                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2496                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counterBuffer);
2497
2498                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2499                 {
2500                         gl.useProgram(m_evenProgram->getProgram());
2501                         gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2502                         gl.dispatchCompute(m_workSize, 1, 1);
2503
2504                         gl.useProgram(m_oddProgram->getProgram());
2505                         gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2506                         gl.dispatchCompute(m_workSize, 1, 1);
2507                 }
2508
2509                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2510         }
2511
2512         // Verify result
2513         {
2514                 deUint32 result;
2515
2516                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2517
2518                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2519                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2520
2521                 if ((int)result != m_numCalls*m_workSize)
2522                 {
2523                         m_testCtx.getLog()
2524                                 << tcu::TestLog::Message
2525                                 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2526                                 << tcu::TestLog::EndMessage;
2527
2528                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2529                         return STOP;
2530                 }
2531
2532                 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2533         }
2534
2535         // verify steps
2536         {
2537                 std::vector<deUint32> intermediateResults;
2538
2539                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2540
2541                 // collect results
2542
2543                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2544                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2545
2546                 // verify values
2547
2548                 std::sort(intermediateResults.begin(), intermediateResults.end());
2549
2550                 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2551                 {
2552                         if ((int)intermediateResults[valueNdx] != valueNdx)
2553                         {
2554                                 m_testCtx.getLog()
2555                                         << tcu::TestLog::Message
2556                                         << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2557                                         << "Intermediate buffer contains invalid values. Intermediate results:\n"
2558                                         << tcu::TestLog::EndMessage;
2559
2560                                 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2561                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2562
2563                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2564                                 return STOP;
2565                         }
2566                 }
2567
2568                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2569         }
2570
2571         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2572         return STOP;
2573 }
2574
2575 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2576 {
2577         std::ostringstream buf;
2578
2579         buf     << "${GLSL_VERSION_DECL}\n"
2580                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2581                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2582                 << "{\n"
2583                 << "    highp uint values[" << m_workSize * m_numCalls << "];\n"
2584                 << "} sb_ires;\n"
2585                 << "\n"
2586                 << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
2587                 << "uniform highp uint u_callNdx;\n"
2588                 << "\n"
2589                 << "void main ()\n"
2590                 << "{\n"
2591                 << "    highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2592                 << "    if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2593                 << "            sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2594                 << "}";
2595
2596         return specializeShader(m_context, buf.str().c_str());
2597 }
2598
2599 class ConcurrentImageAtomicCase : public TestCase
2600 {
2601 public:
2602
2603                                                         ConcurrentImageAtomicCase       (Context& context, const char* name, const char* description, int numCalls, int workSize);
2604                                                         ~ConcurrentImageAtomicCase      (void);
2605
2606         void                                    init                                            (void);
2607         void                                    deinit                                          (void);
2608         IterateResult                   iterate                                         (void);
2609
2610 private:
2611         void                                    readWorkImage                           (std::vector<deUint32>& result);
2612
2613         std::string                             genComputeSource                        (void) const;
2614         std::string                             genImageReadSource                      (void) const;
2615         std::string                             genImageClearSource                     (void) const;
2616
2617         const int                               m_numCalls;
2618         const int                               m_workSize;
2619         glu::ShaderProgram*             m_program;
2620         glu::ShaderProgram*             m_imageReadProgram;
2621         glu::ShaderProgram*             m_imageClearProgram;
2622         deUint32                                m_imageID;
2623         std::vector<deUint32>   m_intermediateResultBuffers;
2624 };
2625
2626 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2627         : TestCase                              (context, name, description)
2628         , m_numCalls                    (numCalls)
2629         , m_workSize                    (workSize)
2630         , m_program                             (DE_NULL)
2631         , m_imageReadProgram    (DE_NULL)
2632         , m_imageClearProgram   (DE_NULL)
2633         , m_imageID                             (DE_NULL)
2634 {
2635 }
2636
2637 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2638 {
2639         deinit();
2640 }
2641
2642 void ConcurrentImageAtomicCase::init (void)
2643 {
2644         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2645         std::vector<deUint32>   zeroData                        (m_workSize * m_workSize, 0);
2646
2647         if (!checkSupport(m_context))
2648                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2649
2650         // gen image
2651
2652         gl.genTextures(1, &m_imageID);
2653         gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2654         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2655         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2656         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2657         GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2658
2659         // gen buffers
2660
2661         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2662         {
2663                 deUint32 buffer = 0;
2664
2665                 gl.genBuffers(1, &buffer);
2666                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2667                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2668
2669                 m_intermediateResultBuffers.push_back(buffer);
2670                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2671         }
2672
2673         // gen programs
2674
2675         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2676         m_testCtx.getLog() << *m_program;
2677         if (!m_program->isOk())
2678                 throw tcu::TestError("could not build program");
2679
2680         m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2681         if (!m_imageReadProgram->isOk())
2682         {
2683                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2684
2685                 m_testCtx.getLog() << *m_imageReadProgram;
2686                 throw tcu::TestError("could not build program");
2687         }
2688
2689         m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2690         if (!m_imageClearProgram->isOk())
2691         {
2692                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2693
2694                 m_testCtx.getLog() << *m_imageClearProgram;
2695                 throw tcu::TestError("could not build program");
2696         }
2697 }
2698
2699 void ConcurrentImageAtomicCase::deinit (void)
2700 {
2701         if (m_imageID)
2702         {
2703                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2704                 m_imageID = 0;
2705         }
2706
2707         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2708                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2709         m_intermediateResultBuffers.clear();
2710
2711         delete m_program;
2712         m_program = DE_NULL;
2713
2714         delete m_imageReadProgram;
2715         m_imageReadProgram = DE_NULL;
2716
2717         delete m_imageClearProgram;
2718         m_imageClearProgram = DE_NULL;
2719 }
2720
2721 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2722 {
2723         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2724         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2725         std::vector<int>                deltas;
2726
2727         // generate unique deltas
2728         generateShuffledRamp(m_numCalls, deltas);
2729
2730         // clear image
2731         {
2732                 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2733
2734                 gl.useProgram(m_imageClearProgram->getProgram());
2735                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2736                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2737                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2738
2739                 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2740         }
2741
2742         // invoke program N times, each with a different delta
2743         {
2744                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2745
2746                 m_testCtx.getLog()
2747                         << tcu::TestLog::Message
2748                         << "Running shader " << m_numCalls << " times.\n"
2749                         << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2750                         << "Setting u_atomicDelta to a unique value for each call.\n"
2751                         << tcu::TestLog::EndMessage;
2752
2753                 if (deltaLocation == -1)
2754                         throw tcu::TestError("u_atomicDelta location was -1");
2755
2756                 gl.useProgram(m_program->getProgram());
2757                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2758
2759                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2760                 {
2761                         m_testCtx.getLog()
2762                                 << tcu::TestLog::Message
2763                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2764                                 << tcu::TestLog::EndMessage;
2765
2766                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2767                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2768                         gl.dispatchCompute(m_workSize, m_workSize, 1);
2769                 }
2770
2771                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2772         }
2773
2774         // Verify result
2775         {
2776                 std::vector<deUint32> result;
2777
2778                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2779
2780                 readWorkImage(result);
2781
2782                 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2783                 {
2784                         if (result[ndx] != sumValue)
2785                         {
2786                                 m_testCtx.getLog()
2787                                         << tcu::TestLog::Message
2788                                         << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2789                                         << "Work image contains invalid values."
2790                                         << tcu::TestLog::EndMessage;
2791
2792                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2793                                 return STOP;
2794                         }
2795                 }
2796
2797                 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2798         }
2799
2800         // verify steps
2801         {
2802                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2803                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2804                 std::vector<deUint32>                           chainDelta                      (m_numCalls);
2805
2806                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2807
2808                 // collect results
2809
2810                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811                 {
2812                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2813                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2814                 }
2815
2816                 // verify values
2817
2818                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2819                 {
2820                         int                     invalidOperationNdx;
2821                         deUint32        errorDelta;
2822                         deUint32        errorExpected;
2823
2824                         // collect result chain for each element
2825                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2826                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2827
2828                         // check there exists a path from 0 to sumValue using each addition once
2829                         // decompose cumulative results to addition operations (all additions positive => this works)
2830
2831                         std::sort(valueChain.begin(), valueChain.end());
2832
2833                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2834                                 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2835
2836                         // chainDelta contains now the actual additions applied to the value
2837                         std::sort(chainDelta.begin(), chainDelta.end());
2838
2839                         // validate chain
2840                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2841                         {
2842                                 m_testCtx.getLog()
2843                                         << tcu::TestLog::Message
2844                                         << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2845                                         << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2846                                         << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2847                                         << tcu::TestLog::EndMessage;
2848
2849                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2850                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2851                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2852
2853                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2854                                 return STOP;
2855                         }
2856                 }
2857
2858                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2859         }
2860
2861         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2862         return STOP;
2863 }
2864
2865 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2866 {
2867         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2868         glu::Buffer                             resultBuffer    (m_context.getRenderContext());
2869
2870         // Read image to an ssbo
2871
2872         {
2873                 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2874
2875                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2876                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2877
2878                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2879                 gl.useProgram(m_imageReadProgram->getProgram());
2880
2881                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2882                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2883                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2884
2885                 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2886         }
2887
2888         // Read ssbo
2889         {
2890                 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2891                 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2892
2893                 if (!ptr)
2894                         throw tcu::TestError("mapBufferRange returned NULL");
2895
2896                 result.resize(m_workSize * m_workSize);
2897                 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2898
2899                 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2900                         throw tcu::TestError("unmapBuffer returned false");
2901         }
2902 }
2903
2904 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2905 {
2906         std::ostringstream buf;
2907
2908         buf     << "${GLSL_VERSION_DECL}\n"
2909                 << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2910                 << "\n"
2911                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2912                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2913                 << "{\n"
2914                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2915                 << "} sb_ires;\n"
2916                 << "\n"
2917                 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2918                 << "uniform highp uint u_atomicDelta;\n"
2919                 << "\n"
2920                 << "void main ()\n"
2921                 << "{\n"
2922                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923                 << "    sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2924                 << "}";
2925
2926         return specializeShader(m_context, buf.str().c_str());
2927 }
2928
2929 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2930 {
2931         std::ostringstream buf;
2932
2933         buf     << "${GLSL_VERSION_DECL}\n"
2934                 << "\n"
2935                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936                 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2937                 << "{\n"
2938                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2939                 << "} sb_res;\n"
2940                 << "\n"
2941                 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2942                 << "\n"
2943                 << "void main ()\n"
2944                 << "{\n"
2945                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2946                 << "    sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2947                 << "}";
2948
2949         return specializeShader(m_context, buf.str().c_str());
2950 }
2951
2952 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2953 {
2954         std::ostringstream buf;
2955
2956         buf     << "${GLSL_VERSION_DECL}\n"
2957                 << "\n"
2958                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2959                 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2960                 << "\n"
2961                 << "void main ()\n"
2962                 << "{\n"
2963                 << "    imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2964                 << "}";
2965
2966         return specializeShader(m_context, buf.str().c_str());
2967 }
2968
2969 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2970 {
2971 public:
2972                                                         ConcurrentSSBOAtomicCounterMixedCase    (Context& context, const char* name, const char* description, int numCalls, int workSize);
2973                                                         ~ConcurrentSSBOAtomicCounterMixedCase   (void);
2974
2975         void                                    init                                                                    (void);
2976         void                                    deinit                                                                  (void);
2977         IterateResult                   iterate                                                                 (void);
2978
2979 private:
2980         std::string                             genSSBOComputeSource                                    (void) const;
2981         std::string                             genAtomicCounterComputeSource                   (void) const;
2982
2983         const int                               m_numCalls;
2984         const int                               m_workSize;
2985         deUint32                                m_bufferID;
2986         glu::ShaderProgram*             m_ssboAtomicProgram;
2987         glu::ShaderProgram*             m_atomicCounterProgram;
2988 };
2989
2990 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2991         : TestCase                                      (context, name, description)
2992         , m_numCalls                            (numCalls)
2993         , m_workSize                            (workSize)
2994         , m_bufferID                            (DE_NULL)
2995         , m_ssboAtomicProgram           (DE_NULL)
2996         , m_atomicCounterProgram        (DE_NULL)
2997 {
2998         // SSBO atomic XORs cancel out
2999         DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
3000 }
3001
3002 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
3003 {
3004         deinit();
3005 }
3006
3007 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3008 {
3009         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
3010         const deUint32                          zeroBuf[2]      = { 0, 0 };
3011
3012         // gen buffer
3013
3014         gl.genBuffers(1, &m_bufferID);
3015         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3016         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3017
3018         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3019
3020         // gen programs
3021
3022         {
3023                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3024
3025                 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3026                 m_testCtx.getLog() << *m_ssboAtomicProgram;
3027                 if (!m_ssboAtomicProgram->isOk())
3028                         throw tcu::TestError("could not build program");
3029         }
3030         {
3031                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3032
3033                 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3034                 m_testCtx.getLog() << *m_atomicCounterProgram;
3035                 if (!m_atomicCounterProgram->isOk())
3036                         throw tcu::TestError("could not build program");
3037         }
3038 }
3039
3040 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3041 {
3042         if (m_bufferID)
3043         {
3044                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3045                 m_bufferID = 0;
3046         }
3047
3048         delete m_ssboAtomicProgram;
3049         m_ssboAtomicProgram = DE_NULL;
3050
3051         delete m_atomicCounterProgram;
3052         m_atomicCounterProgram = DE_NULL;
3053 }
3054
3055 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3056 {
3057         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3058
3059         m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3060
3061         // invoke programs N times
3062         {
3063                 m_testCtx.getLog()
3064                         << tcu::TestLog::Message
3065                         << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3066                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
3067                         << tcu::TestLog::EndMessage;
3068
3069                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3070                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_bufferID);
3071
3072                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3073                 {
3074                         gl.useProgram(m_atomicCounterProgram->getProgram());
3075                         gl.dispatchCompute(m_workSize, 1, 1);
3076
3077                         gl.useProgram(m_ssboAtomicProgram->getProgram());
3078                         gl.dispatchCompute(m_workSize, 1, 1);
3079                 }
3080
3081                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3082         }
3083
3084         // Verify result
3085         {
3086                 deUint32 result;
3087
3088                 // XORs cancel out, only addition is left
3089                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3090
3091                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3092                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3093
3094                 if ((int)result != m_numCalls*m_workSize)
3095                 {
3096                         m_testCtx.getLog()
3097                                 << tcu::TestLog::Message
3098                                 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3099                                 << tcu::TestLog::EndMessage;
3100
3101                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3102                         return STOP;
3103                 }
3104
3105                 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3106         }
3107
3108         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3109         return STOP;
3110 }
3111
3112 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3113 {
3114         std::ostringstream buf;
3115
3116         buf     << "${GLSL_VERSION_DECL}\n"
3117                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3118                 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3119                 << "{\n"
3120                 << "    highp uint targetValue;\n"
3121                 << "    highp uint unused;\n"
3122                 << "} sb_work;\n"
3123                 << "\n"
3124                 << "void main ()\n"
3125                 << "{\n"
3126                 << "    // flip high bits\n"
3127                 << "    highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3128                 << "    sb_work.unused = atomicXor(sb_work.targetValue, mask);\n"
3129                 << "}";
3130
3131         return specializeShader(m_context, buf.str().c_str());
3132 }
3133
3134 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3135 {
3136         std::ostringstream buf;
3137
3138         buf     << "${GLSL_VERSION_DECL}\n"
3139                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3140                 << "\n"
3141                 << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
3142                 << "\n"
3143                 << "void main ()\n"
3144                 << "{\n"
3145                 << "    atomicCounterIncrement(u_counter);\n"
3146                 << "}";
3147
3148         return specializeShader(m_context, buf.str().c_str());
3149 }
3150
3151 } // anonymous
3152
3153 SynchronizationTests::SynchronizationTests (Context& context)
3154         : TestCaseGroup(context, "synchronization", "Synchronization tests")
3155 {
3156 }
3157
3158 SynchronizationTests::~SynchronizationTests (void)
3159 {
3160 }
3161
3162 void SynchronizationTests::init (void)
3163 {
3164         tcu::TestCaseGroup* const inInvocationGroup             = new tcu::TestCaseGroup(m_testCtx, "in_invocation",    "Test intra-invocation synchronization");
3165         tcu::TestCaseGroup* const interInvocationGroup  = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3166         tcu::TestCaseGroup* const interCallGroup                = new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3167
3168         addChild(inInvocationGroup);
3169         addChild(interInvocationGroup);
3170         addChild(interCallGroup);
3171
3172         // .in_invocation & .inter_invocation
3173         {
3174                 static const struct CaseConfig
3175                 {
3176                         const char*                                                                     namePrefix;
3177                         const InterInvocationTestCase::StorageType      storage;
3178                         const int                                                                       flags;
3179                 } configs[] =
3180                 {
3181                         { "image",                      InterInvocationTestCase::STORAGE_IMAGE,         0                                                                               },
3182                         { "image_atomic",       InterInvocationTestCase::STORAGE_IMAGE,         InterInvocationTestCase::FLAG_ATOMIC    },
3183                         { "ssbo",                       InterInvocationTestCase::STORAGE_BUFFER,        0                                                                               },
3184                         { "ssbo_atomic",        InterInvocationTestCase::STORAGE_BUFFER,        InterInvocationTestCase::FLAG_ATOMIC    },
3185                 };
3186
3187                 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3188                 {
3189                         tcu::TestCaseGroup* const       targetGroup     = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3190                         const int                                       extraFlags      = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3191
3192                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3193                         {
3194                                 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3195
3196                                 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3197                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3198                                                                                                                                   (std::string("Write to ") + target + " and read it").c_str(),
3199                                                                                                                                   configs[configNdx].storage,
3200                                                                                                                                   configs[configNdx].flags | extraFlags));
3201
3202                                 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3203                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3204                                                                                                                                   (std::string("Read form ") + target + " and then write to it").c_str(),
3205                                                                                                                                   configs[configNdx].storage,
3206                                                                                                                                   configs[configNdx].flags | extraFlags));
3207
3208                                 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3209                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3210                                                                                                                                   (std::string("Write to ") + target + " twice and read it").c_str(),
3211                                                                                                                                   configs[configNdx].storage,
3212                                                                                                                                   configs[configNdx].flags | extraFlags));
3213
3214                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3215                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3216                                                                                                                                    (std::string("Write to aliasing ") + target + " and read it").c_str(),
3217                                                                                                                                    InvocationAliasWriteCase::TYPE_WRITE,
3218                                                                                                                                    configs[configNdx].storage,
3219                                                                                                                                    configs[configNdx].flags | extraFlags));
3220
3221                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3222                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3223                                                                                                                                    (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3224                                                                                                                                    InvocationAliasWriteCase::TYPE_OVERWRITE,
3225                                                                                                                                    configs[configNdx].storage,
3226                                                                                                                                    configs[configNdx].flags | extraFlags));
3227                         }
3228                 }
3229         }
3230
3231         // .inter_call
3232         {
3233                 tcu::TestCaseGroup* const withBarrierGroup              = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3234                 tcu::TestCaseGroup* const withoutBarrierGroup   = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3235
3236                 interCallGroup->addChild(withBarrierGroup);
3237                 interCallGroup->addChild(withoutBarrierGroup);
3238
3239                 // .with_memory_barrier
3240                 {
3241                         static const struct CaseConfig
3242                         {
3243                                 const char*                                                             namePrefix;
3244                                 const InterCallTestCase::StorageType    storage;
3245                                 const int                                                               flags;
3246                         } configs[] =
3247                         {
3248                                 { "image",                      InterCallTestCase::STORAGE_IMAGE,       0                                                                                                                                               },
3249                                 { "image_atomic",       InterCallTestCase::STORAGE_IMAGE,       InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3250                                 { "ssbo",                       InterCallTestCase::STORAGE_BUFFER,      0                                                                                                                                               },
3251                                 { "ssbo_atomic",        InterCallTestCase::STORAGE_BUFFER,      InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3252                         };
3253
3254                         const int seed0 = 123;
3255                         const int seed1 = 457;
3256
3257                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3258                         {
3259                                 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3260
3261                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3262                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3263                                                                                                                                  (std::string("Write to ") + target + " and read it").c_str(),
3264                                                                                                                                  configs[configNdx].storage,
3265                                                                                                                                  configs[configNdx].flags,
3266                                                                                                                                  InterCallOperations()
3267                                                                                                                                         << op::WriteData::Generate(1, seed0)
3268                                                                                                                                         << op::Barrier()
3269                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3270
3271                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3272                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3273                                                                                                                                  (std::string("Read from ") + target + " and then write to it").c_str(),
3274                                                                                                                                  configs[configNdx].storage,
3275                                                                                                                                  configs[configNdx].flags,
3276                                                                                                                                  InterCallOperations()
3277                                                                                                                                         << op::ReadZeroData::Generate(1)
3278                                                                                                                                         << op::Barrier()
3279                                                                                                                                         << op::WriteData::Generate(1, seed0)));
3280
3281                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3283                                                                                                                                  (std::string("Write to ") + target + " twice and read it").c_str(),
3284                                                                                                                                  configs[configNdx].storage,
3285                                                                                                                                  configs[configNdx].flags,
3286                                                                                                                                  InterCallOperations()
3287                                                                                                                                         << op::WriteData::Generate(1, seed0)
3288                                                                                                                                         << op::Barrier()
3289                                                                                                                                         << op::WriteData::Generate(1, seed1)
3290                                                                                                                                         << op::Barrier()
3291                                                                                                                                         << op::ReadData::Generate(1, seed1)));
3292
3293                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3294                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3295                                                                                                                                  (std::string("Write to multiple ") + target + "s and read them").c_str(),
3296                                                                                                                                  configs[configNdx].storage,
3297                                                                                                                                  configs[configNdx].flags,
3298                                                                                                                                  InterCallOperations()
3299                                                                                                                                         << op::WriteData::Generate(1, seed0)
3300                                                                                                                                         << op::WriteData::Generate(2, seed1)
3301                                                                                                                                         << op::Barrier()
3302                                                                                                                                         << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3303
3304                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3306                                                                                                                                  (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3307                                                                                                                                  configs[configNdx].storage,
3308                                                                                                                                  configs[configNdx].flags,
3309                                                                                                                                  InterCallOperations()
3310                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed0, true)
3311                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed1, false)
3312                                                                                                                                         << op::Barrier()
3313                                                                                                                                         << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3314
3315                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3316                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3317                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3318                                                                                                                                  configs[configNdx].storage,
3319                                                                                                                                  configs[configNdx].flags,
3320                                                                                                                                  InterCallOperations()
3321                                                                                                                                         << op::WriteData::Generate(1, seed0)
3322                                                                                                                                         << op::WriteData::Generate(2, seed1)
3323                                                                                                                                         << op::Barrier()
3324                                                                                                                                         << op::ReadData::Generate(1, seed0)
3325                                                                                                                                         << op::ReadData::Generate(2, seed1)));
3326
3327                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3328                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3329                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3330                                                                                                                                  configs[configNdx].storage,
3331                                                                                                                                  configs[configNdx].flags,
3332                                                                                                                                  InterCallOperations()
3333                                                                                                                                         << op::WriteData::Generate(1, seed0)
3334                                                                                                                                         << op::WriteData::Generate(2, seed1)
3335                                                                                                                                         << op::Barrier()
3336                                                                                                                                         << op::ReadData::Generate(2, seed1)
3337                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3338                         }
3339
3340                         // .without_memory_barrier
3341                         {
3342                                 struct InvocationConfig
3343                                 {
3344                                         const char*     name;
3345                                         int                     count;
3346                                 };
3347
3348                                 static const InvocationConfig ssboInvocations[] =
3349                                 {
3350                                         { "1k",         1024    },
3351                                         { "4k",         4096    },
3352                                         { "32k",        32768   },
3353                                 };
3354                                 static const InvocationConfig imageInvocations[] =
3355                                 {
3356                                         { "8x8",                8       },
3357                                         { "32x32",              32      },
3358                                         { "128x128",    128     },
3359                                 };
3360                                 static const InvocationConfig counterInvocations[] =
3361                                 {
3362                                         { "32",         32              },
3363                                         { "128",        128             },
3364                                         { "1k",         1024    },
3365                                 };
3366                                 static const int callCounts[] = { 2, 5, 100 };
3367
3368                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3369                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370                                                 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),       "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3371
3372                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3373                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374                                                 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),    "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3375
3376                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378                                                 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),      "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379
3380                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3381                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3382                                                 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),  "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3383                         }
3384                 }
3385         }
3386 }
3387
3388 } // Functional
3389 } // gles31
3390 } // deqp