Add tests for VK_KHR_incremental_present
[platform/upstream/VK-GL-CTS.git] / modules / gles31 / functional / es31fSynchronizationTests.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Synchronization Tests
22  *//*--------------------------------------------------------------------*/
23
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuSurface.hpp"
27 #include "tcuRenderTarget.hpp"
28 #include "gluRenderContext.hpp"
29 #include "gluShaderProgram.hpp"
30 #include "gluObjectWrapper.hpp"
31 #include "gluPixelTransfer.hpp"
32 #include "gluContextInfo.hpp"
33 #include "glwFunctions.hpp"
34 #include "glwEnums.hpp"
35 #include "deStringUtil.hpp"
36 #include "deSharedPtr.hpp"
37 #include "deMemory.h"
38 #include "deRandom.hpp"
39
40 #include <map>
41
42 namespace deqp
43 {
44 namespace gles31
45 {
46 namespace Functional
47 {
48 namespace
49 {
50
51
52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
53 {
54         std::vector<deUint32> chainDelta(valueChain.size());
55
56         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
57                 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
58
59         // chainDelta contains now the actual additions applied to the value
60         // check there exists an addition ramp form 1 to ...
61         std::sort(chainDelta.begin(), chainDelta.end());
62
63         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
64         {
65                 if ((int)chainDelta[callNdx] != callNdx+1)
66                 {
67                         invalidOperationNdx = callNdx;
68                         errorDelta = chainDelta[callNdx];
69                         errorExpected = callNdx+1;
70
71                         return false;
72                 }
73         }
74
75         return true;
76 }
77
78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
79 {
80         const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
81         GLU_EXPECT_NO_ERROR(gl.getError(), "map");
82
83         if (!ptr)
84                 throw tcu::TestError("mapBufferRange returned NULL");
85
86         result.resize(numElements);
87         memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
88
89         if (gl.unmapBuffer(target) == GL_FALSE)
90                 throw tcu::TestError("unmapBuffer returned false");
91 }
92
93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
94 {
95         std::vector<deUint32> vec;
96
97         readBuffer(gl, target, 1, vec);
98
99         return vec[0];
100 }
101
102 //! Generate a ramp of values from 1 to numElements, and shuffle it
103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
104 {
105         de::Random rng(0xabcd);
106
107         // some positive (non-zero) unique values
108         ramp.resize(numElements);
109         for (int callNdx = 0; callNdx < numElements; ++callNdx)
110                 ramp[callNdx] = callNdx + 1;
111
112         rng.shuffle(ramp.begin(), ramp.end());
113 }
114
115 class InterInvocationTestCase : public TestCase
116 {
117 public:
118         enum StorageType
119         {
120                 STORAGE_BUFFER = 0,
121                 STORAGE_IMAGE,
122
123                 STORAGE_LAST
124         };
125         enum CaseFlags
126         {
127                 FLAG_ATOMIC                             = 0x1,
128                 FLAG_ALIASING_STORAGES  = 0x2,
129                 FLAG_IN_GROUP                   = 0x4,
130         };
131
132                                                 InterInvocationTestCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
133                                                 ~InterInvocationTestCase        (void);
134
135 private:
136         void                            init                                            (void);
137         void                            deinit                                          (void);
138         IterateResult           iterate                                         (void);
139
140         void                            runCompute                                      (void);
141         bool                            verifyResults                           (void);
142         virtual std::string     genShaderSource                         (void) const = 0;
143
144 protected:
145         std::string                     genBarrierSource                        (void) const;
146
147         const StorageType       m_storage;
148         const bool                      m_useAtomic;
149         const bool                      m_aliasingStorages;
150         const bool                      m_syncWithGroup;
151         const int                       m_workWidth;                            // !< total work width
152         const int                       m_workHeight;                           // !<     ...    height
153         const int                       m_localWidth;                           // !< group width
154         const int                       m_localHeight;                          // !< group height
155         const int                       m_elementsPerInvocation;        // !< elements accessed by a single invocation
156
157 private:
158         glw::GLuint                     m_storageBuf;
159         glw::GLuint                     m_storageTex;
160         glw::GLuint                     m_resultBuf;
161         glu::ShaderProgram*     m_program;
162 };
163
164 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
165         : TestCase                                      (context, name, desc)
166         , m_storage                                     (storage)
167         , m_useAtomic                           ((flags & FLAG_ATOMIC) != 0)
168         , m_aliasingStorages            ((flags & FLAG_ALIASING_STORAGES) != 0)
169         , m_syncWithGroup                       ((flags & FLAG_IN_GROUP) != 0)
170         , m_workWidth                           (256)
171         , m_workHeight                          (256)
172         , m_localWidth                          (16)
173         , m_localHeight                         (8)
174         , m_elementsPerInvocation       (8)
175         , m_storageBuf                          (0)
176         , m_storageTex                          (0)
177         , m_resultBuf                           (0)
178         , m_program                                     (DE_NULL)
179 {
180         DE_ASSERT(m_storage < STORAGE_LAST);
181         DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
182 }
183
184 InterInvocationTestCase::~InterInvocationTestCase (void)
185 {
186         deinit();
187 }
188
189 void InterInvocationTestCase::init (void)
190 {
191         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
192
193         // requirements
194
195         if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
196                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
197
198         // program
199
200         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
201         m_testCtx.getLog() << *m_program;
202         if (!m_program->isOk())
203                 throw tcu::TestError("could not build program");
204
205         // source
206
207         if (m_storage == STORAGE_BUFFER)
208         {
209                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
210                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
211                 std::vector<deUint32>   zeroBuffer              (bufferElements, 0);
212
213                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
214
215                 gl.genBuffers(1, &m_storageBuf);
216                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
217                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
218                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
219         }
220         else if (m_storage == STORAGE_IMAGE)
221         {
222                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
223                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
224
225                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
226
227                 gl.genTextures(1, &m_storageTex);
228                 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
229                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
230                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
231                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
232                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
233
234                 // Zero-fill
235                 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
236
237                 {
238                         const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
239                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
240                         GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
241                 }
242         }
243         else
244                 DE_ASSERT(DE_FALSE);
245
246         // destination
247
248         {
249                 const int                               bufferElements  = m_workWidth * m_workHeight;
250                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
251                 std::vector<deInt32>    negativeBuffer  (bufferElements, -1);
252
253                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
254
255                 gl.genBuffers(1, &m_resultBuf);
256                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
257                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
258                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
259         }
260 }
261
262 void InterInvocationTestCase::deinit (void)
263 {
264         if (m_storageBuf)
265         {
266                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
267                 m_storageBuf = DE_NULL;
268         }
269
270         if (m_storageTex)
271         {
272                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
273                 m_storageTex = DE_NULL;
274         }
275
276         if (m_resultBuf)
277         {
278                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
279                 m_resultBuf = DE_NULL;
280         }
281
282         delete m_program;
283         m_program = DE_NULL;
284 }
285
286 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
287 {
288         // Dispatch
289         runCompute();
290
291         // Verify buffer contents
292         if (verifyResults())
293                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
294         else
295                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
296
297         return STOP;
298 }
299
300 void InterInvocationTestCase::runCompute (void)
301 {
302         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
303         const int                               groupsX = m_workWidth / m_localWidth;
304         const int                               groupsY = m_workHeight / m_localHeight;
305
306         DE_ASSERT((m_workWidth % m_localWidth) == 0);
307         DE_ASSERT((m_workHeight % m_localHeight) == 0);
308
309         m_testCtx.getLog()
310                 << tcu::TestLog::Message
311                 << "Dispatching compute.\n"
312                 << "    group size: " << m_localWidth << "x" << m_localHeight << "\n"
313                 << "    dispatch size: " << groupsX << "x" << groupsY << "\n"
314                 << "    total work size: " << m_workWidth << "x" << m_workHeight << "\n"
315                 << tcu::TestLog::EndMessage;
316
317         gl.useProgram(m_program->getProgram());
318
319         // source
320         if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
321         {
322                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
323                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
324         }
325         else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
326         {
327                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
328                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
329                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
330
331                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
332         }
333         else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
334         {
335                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
336                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
337         }
338         else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
339         {
340                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
341                 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
342
343                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
344
345                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
346         }
347         else
348                 DE_ASSERT(DE_FALSE);
349
350         // destination
351         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
352         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
353
354         // dispatch
355         gl.dispatchCompute(groupsX, groupsY, 1);
356         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
357 }
358
359 bool InterInvocationTestCase::verifyResults (void)
360 {
361         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
362         const int                               errorFloodThreshold     = 5;
363         int                                             numErrorsLogged         = 0;
364         const void*                             mapped                          = DE_NULL;
365         std::vector<deInt32>    results                         (m_workWidth * m_workHeight);
366         bool                                    error                           = false;
367
368         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
369         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
370         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
371
372         // copy to properly aligned array
373         deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
374
375         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
376                 throw tcu::TestError("memory map store corrupted");
377
378         // check the results
379         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
380         {
381                 if (results[ndx] != 1)
382                 {
383                         error = true;
384
385                         if (numErrorsLogged == 0)
386                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
387                         if (numErrorsLogged++ < errorFloodThreshold)
388                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
389                         else
390                         {
391                                 // after N errors, no point continuing verification
392                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
393                                 break;
394                         }
395                 }
396         }
397
398         if (!error)
399                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
400         return !error;
401 }
402
403 std::string InterInvocationTestCase::genBarrierSource (void) const
404 {
405         std::ostringstream buf;
406
407         if (m_syncWithGroup)
408         {
409                 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
410                 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
411                 //       we only require intra-workgroup synchronization.
412                 buf << "\n"
413                         << "    groupMemoryBarrier();\n"
414                         << "    barrier();\n"
415                         << "\n";
416         }
417         else if (m_storage == STORAGE_BUFFER)
418         {
419                 DE_ASSERT(!m_syncWithGroup);
420
421                 // Waiting only for data written by this invocation. Since all buffer reads and writes are
422                 // processed in order (within a single invocation), we don't have to do anything.
423                 buf << "\n";
424         }
425         else if (m_storage == STORAGE_IMAGE)
426         {
427                 DE_ASSERT(!m_syncWithGroup);
428
429                 // Waiting only for data written by this invocation. But since operations complete in undefined
430                 // order, we have to wait for them to complete.
431                 buf << "\n"
432                         << "    memoryBarrierImage();\n"
433                         << "\n";
434         }
435         else
436                 DE_ASSERT(DE_FALSE);
437
438         return buf.str();
439 }
440
441 class InvocationBasicCase : public InterInvocationTestCase
442 {
443 public:
444                                                         InvocationBasicCase             (Context& context, const char* name, const char* desc, StorageType storage, int flags);
445 private:
446         std::string                             genShaderSource                 (void) const;
447         virtual std::string             genShaderMainBlock              (void) const = 0;
448 };
449
450 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
451         : InterInvocationTestCase(context, name, desc, storage, flags)
452 {
453 }
454
455 std::string InvocationBasicCase::genShaderSource (void) const
456 {
457         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
458         std::ostringstream      buf;
459
460         buf << "#version 310 es\n"
461                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
462                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
463                 << "layout(binding=0, std430) buffer Output\n"
464                 << "{\n"
465                 << "    highp int values[];\n"
466                 << "} sb_result;\n";
467
468         if (m_storage == STORAGE_BUFFER)
469                 buf << "layout(binding=1, std430) coherent buffer Storage\n"
470                         << "{\n"
471                         << "    highp int values[];\n"
472                         << "} sb_store;\n"
473                         << "\n"
474                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
475                         << "{\n"
476                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
477                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
478                         << "}\n";
479         else if (m_storage == STORAGE_IMAGE)
480                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
481                         << "\n"
482                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
483                         << "{\n"
484                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
485                         << "}\n";
486         else
487                 DE_ASSERT(DE_FALSE);
488
489         buf << "\n"
490                 << "void main (void)\n"
491                 << "{\n"
492                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
493                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
494                 << "    bool allOk      = true;\n"
495                 << "\n"
496                 << genShaderMainBlock()
497                 << "\n"
498                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
499                 << "}\n";
500
501         return buf.str();
502 }
503
504 class InvocationWriteReadCase : public InvocationBasicCase
505 {
506 public:
507                                         InvocationWriteReadCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
508 private:
509         std::string             genShaderMainBlock                      (void) const;
510 };
511
512 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
513         : InvocationBasicCase(context, name, desc, storage, flags)
514 {
515 }
516
517 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
518 {
519         std::ostringstream buf;
520
521         // write
522
523         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
524         {
525                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
526                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
527                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
528                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
529                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
530                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
531                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
532                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
533                 else
534                         DE_ASSERT(DE_FALSE);
535         }
536
537         // barrier
538
539         buf << genBarrierSource();
540
541         // read
542
543         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
544         {
545                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
546
547                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
548                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
549                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
550                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
551                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
552                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
553                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
554                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
555                 else
556                         DE_ASSERT(DE_FALSE);
557         }
558
559         return buf.str();
560 }
561
562 class InvocationReadWriteCase : public InvocationBasicCase
563 {
564 public:
565                                         InvocationReadWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
566 private:
567         std::string             genShaderMainBlock                      (void) const;
568 };
569
570 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
571         : InvocationBasicCase(context, name, desc, storage, flags)
572 {
573 }
574
575 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
576 {
577         std::ostringstream buf;
578
579         // read
580
581         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
582         {
583                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
584
585                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
586                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
587                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
588                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
589                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
590                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
591                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
592                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
593                 else
594                         DE_ASSERT(DE_FALSE);
595         }
596
597         // barrier
598
599         buf << genBarrierSource();
600
601         // write
602
603         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
604         {
605                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
606                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
607                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
608                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
609                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
610                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
611                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
612                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
613                 else
614                         DE_ASSERT(DE_FALSE);
615         }
616
617         return buf.str();
618 }
619
620 class InvocationOverWriteCase : public InvocationBasicCase
621 {
622 public:
623                                         InvocationOverWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
624 private:
625         std::string             genShaderMainBlock                      (void) const;
626 };
627
628 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
629         : InvocationBasicCase(context, name, desc, storage, flags)
630 {
631 }
632
633 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
634 {
635         std::ostringstream buf;
636
637         // write
638
639         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
640         {
641                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
642                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
643                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
644                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
645                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
646                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
647                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
648                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
649                 else
650                         DE_ASSERT(DE_FALSE);
651         }
652
653         // barrier
654
655         buf << genBarrierSource();
656
657         // write over
658
659         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
660         {
661                 // write another invocation's value or our own value depending on test type
662                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
663
664                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
665                         buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
666                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667                         buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
668                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669                         buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
670                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671                         buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
672                 else
673                         DE_ASSERT(DE_FALSE);
674         }
675
676         // barrier
677
678         buf << genBarrierSource();
679
680         // read
681
682         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683         {
684                 // check another invocation's value or our own value depending on test type
685                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686
687                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
688                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
689                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
691                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
693                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
695                 else
696                         DE_ASSERT(DE_FALSE);
697         }
698
699         return buf.str();
700 }
701
702 class InvocationAliasWriteCase : public InterInvocationTestCase
703 {
704 public:
705         enum TestType
706         {
707                 TYPE_WRITE = 0,
708                 TYPE_OVERWRITE,
709
710                 TYPE_LAST
711         };
712
713                                         InvocationAliasWriteCase        (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
714 private:
715         std::string             genShaderSource                         (void) const;
716
717         const TestType  m_type;
718 };
719
720 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
721         : InterInvocationTestCase       (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
722         , m_type                                        (type)
723 {
724         DE_ASSERT(type < TYPE_LAST);
725 }
726
727 std::string InvocationAliasWriteCase::genShaderSource (void) const
728 {
729         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
730         std::ostringstream      buf;
731
732         buf << "#version 310 es\n"
733                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
734                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
735                 << "layout(binding=0, std430) buffer Output\n"
736                 << "{\n"
737                 << "    highp int values[];\n"
738                 << "} sb_result;\n";
739
740         if (m_storage == STORAGE_BUFFER)
741                 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
742                         << "{\n"
743                         << "    highp int values[];\n"
744                         << "} sb_store0;\n"
745                         << "layout(binding=2, std430) coherent buffer Storage1\n"
746                         << "{\n"
747                         << "    highp int values[];\n"
748                         << "} sb_store1;\n"
749                         << "\n"
750                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
751                         << "{\n"
752                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
753                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
754                         << "}\n";
755         else if (m_storage == STORAGE_IMAGE)
756                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
757                         << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
758                         << "\n"
759                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
760                         << "{\n"
761                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
762                         << "}\n";
763         else
764                 DE_ASSERT(DE_FALSE);
765
766         buf << "\n"
767                 << "void main (void)\n"
768                 << "{\n"
769                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
770                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
771                 << "    bool allOk      = true;\n"
772                 << "\n";
773
774         if (m_type == TYPE_OVERWRITE)
775         {
776                 // write
777
778                 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
779                 {
780                         if (m_storage == STORAGE_BUFFER && m_useAtomic)
781                                 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
782                         else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
783                                 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
784                         else if (m_storage == STORAGE_IMAGE && m_useAtomic)
785                                 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
786                         else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
787                                 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
788                         else
789                                 DE_ASSERT(DE_FALSE);
790                 }
791
792                 // barrier
793
794                 buf << genBarrierSource();
795         }
796         else
797                 DE_ASSERT(m_type == TYPE_WRITE);
798
799         // write (again)
800
801         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802         {
803                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
804
805                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
806                         buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
807                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
808                         buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
809                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
810                         buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
811                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
812                         buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
813                 else
814                         DE_ASSERT(DE_FALSE);
815         }
816
817         // barrier
818
819         buf << genBarrierSource();
820
821         // read
822
823         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
824         {
825                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
826                         buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
827                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
828                         buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
829                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
830                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
831                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
832                         buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
833                 else
834                         DE_ASSERT(DE_FALSE);
835         }
836
837         // return result
838
839         buf << "\n"
840                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
841                 << "}\n";
842
843         return buf.str();
844 }
845
846 namespace op
847 {
848
849 struct WriteData
850 {
851         int targetHandle;
852         int seed;
853
854         static WriteData Generate(int targetHandle, int seed)
855         {
856                 WriteData retVal;
857
858                 retVal.targetHandle = targetHandle;
859                 retVal.seed = seed;
860
861                 return retVal;
862         }
863 };
864
865 struct ReadData
866 {
867         int targetHandle;
868         int seed;
869
870         static ReadData Generate(int targetHandle, int seed)
871         {
872                 ReadData retVal;
873
874                 retVal.targetHandle = targetHandle;
875                 retVal.seed = seed;
876
877                 return retVal;
878         }
879 };
880
881 struct Barrier
882 {
883 };
884
885 struct WriteDataInterleaved
886 {
887         int             targetHandle;
888         int             seed;
889         bool    evenOdd;
890
891         static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
892         {
893                 WriteDataInterleaved retVal;
894
895                 retVal.targetHandle = targetHandle;
896                 retVal.seed = seed;
897                 retVal.evenOdd = evenOdd;
898
899                 return retVal;
900         }
901 };
902
903 struct ReadDataInterleaved
904 {
905         int targetHandle;
906         int seed0;
907         int seed1;
908
909         static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
910         {
911                 ReadDataInterleaved retVal;
912
913                 retVal.targetHandle = targetHandle;
914                 retVal.seed0 = seed0;
915                 retVal.seed1 = seed1;
916
917                 return retVal;
918         }
919 };
920
921 struct ReadMultipleData
922 {
923         int targetHandle0;
924         int seed0;
925         int targetHandle1;
926         int seed1;
927
928         static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
929         {
930                 ReadMultipleData retVal;
931
932                 retVal.targetHandle0 = targetHandle0;
933                 retVal.seed0 = seed0;
934                 retVal.targetHandle1 = targetHandle1;
935                 retVal.seed1 = seed1;
936
937                 return retVal;
938         }
939 };
940
941 struct ReadZeroData
942 {
943         int targetHandle;
944
945         static ReadZeroData Generate(int targetHandle)
946         {
947                 ReadZeroData retVal;
948
949                 retVal.targetHandle = targetHandle;
950
951                 return retVal;
952         }
953 };
954
955 } // namespace op
956
957 class InterCallTestCase;
958
959 class InterCallOperations
960 {
961 public:
962         InterCallOperations& operator<< (const op::WriteData&);
963         InterCallOperations& operator<< (const op::ReadData&);
964         InterCallOperations& operator<< (const op::Barrier&);
965         InterCallOperations& operator<< (const op::ReadMultipleData&);
966         InterCallOperations& operator<< (const op::WriteDataInterleaved&);
967         InterCallOperations& operator<< (const op::ReadDataInterleaved&);
968         InterCallOperations& operator<< (const op::ReadZeroData&);
969
970 private:
971         struct Command
972         {
973                 enum CommandType
974                 {
975                         TYPE_WRITE = 0,
976                         TYPE_READ,
977                         TYPE_BARRIER,
978                         TYPE_READ_MULTIPLE,
979                         TYPE_WRITE_INTERLEAVE,
980                         TYPE_READ_INTERLEAVE,
981                         TYPE_READ_ZERO,
982
983                         TYPE_LAST
984                 };
985
986                 CommandType type;
987
988                 union CommandUnion
989                 {
990                         op::WriteData                           write;
991                         op::ReadData                            read;
992                         op::Barrier                                     barrier;
993                         op::ReadMultipleData            readMulti;
994                         op::WriteDataInterleaved        writeInterleave;
995                         op::ReadDataInterleaved         readInterleave;
996                         op::ReadZeroData                        readZero;
997                 } u_cmd;
998         };
999
1000         friend class InterCallTestCase;
1001
1002         std::vector<Command> m_cmds;
1003 };
1004
1005 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1006 {
1007         m_cmds.push_back(Command());
1008         m_cmds.back().type = Command::TYPE_WRITE;
1009         m_cmds.back().u_cmd.write = cmd;
1010
1011         return *this;
1012 }
1013
1014 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1015 {
1016         m_cmds.push_back(Command());
1017         m_cmds.back().type = Command::TYPE_READ;
1018         m_cmds.back().u_cmd.read = cmd;
1019
1020         return *this;
1021 }
1022
1023 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1024 {
1025         m_cmds.push_back(Command());
1026         m_cmds.back().type = Command::TYPE_BARRIER;
1027         m_cmds.back().u_cmd.barrier = cmd;
1028
1029         return *this;
1030 }
1031
1032 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1033 {
1034         m_cmds.push_back(Command());
1035         m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1036         m_cmds.back().u_cmd.readMulti = cmd;
1037
1038         return *this;
1039 }
1040
1041 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1042 {
1043         m_cmds.push_back(Command());
1044         m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1045         m_cmds.back().u_cmd.writeInterleave = cmd;
1046
1047         return *this;
1048 }
1049
1050 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1051 {
1052         m_cmds.push_back(Command());
1053         m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1054         m_cmds.back().u_cmd.readInterleave = cmd;
1055
1056         return *this;
1057 }
1058
1059 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1060 {
1061         m_cmds.push_back(Command());
1062         m_cmds.back().type = Command::TYPE_READ_ZERO;
1063         m_cmds.back().u_cmd.readZero = cmd;
1064
1065         return *this;
1066 }
1067
1068 class InterCallTestCase : public TestCase
1069 {
1070 public:
1071         enum StorageType
1072         {
1073                 STORAGE_BUFFER = 0,
1074                 STORAGE_IMAGE,
1075
1076                 STORAGE_LAST
1077         };
1078         enum Flags
1079         {
1080                 FLAG_USE_ATOMIC = 1,
1081                 FLAG_USE_INT    = 2,
1082         };
1083                                                                                                         InterCallTestCase                       (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1084                                                                                                         ~InterCallTestCase                      (void);
1085
1086 private:
1087         void                                                                                    init                                            (void);
1088         void                                                                                    deinit                                          (void);
1089         IterateResult                                                                   iterate                                         (void);
1090         bool                                                                                    verifyResults                           (void);
1091
1092         void                                                                                    runCommand                                      (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1093         void                                                                                    runCommand                                      (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1094         void                                                                                    runCommand                                      (const op::Barrier&);
1095         void                                                                                    runCommand                                      (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1096         void                                                                                    runCommand                                      (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1097         void                                                                                    runCommand                                      (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1098         void                                                                                    runCommand                                      (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1099         void                                                                                    runSingleRead                           (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1100
1101         glw::GLuint                                                                             genStorage                                      (int friendlyName);
1102         glw::GLuint                                                                             genResultStorage                        (void);
1103         glu::ShaderProgram*                                                             genWriteProgram                         (int seed);
1104         glu::ShaderProgram*                                                             genReadProgram                          (int seed);
1105         glu::ShaderProgram*                                                             genReadMultipleProgram          (int seed0, int seed1);
1106         glu::ShaderProgram*                                                             genWriteInterleavedProgram      (int seed, bool evenOdd);
1107         glu::ShaderProgram*                                                             genReadInterleavedProgram       (int seed0, int seed1);
1108         glu::ShaderProgram*                                                             genReadZeroProgram                      (void);
1109
1110         const StorageType                                                               m_storage;
1111         const int                                                                               m_invocationGridSize;   // !< width and height of the two dimensional work dispatch
1112         const int                                                                               m_perInvocationSize;    // !< number of elements accessed in single invocation
1113         const std::vector<InterCallOperations::Command> m_cmds;
1114         const bool                                                                              m_useAtomic;
1115         const bool                                                                              m_formatInteger;
1116
1117         std::vector<glu::ShaderProgram*>                                m_operationPrograms;
1118         std::vector<glw::GLuint>                                                m_operationResultStorages;
1119         std::map<int, glw::GLuint>                                              m_storageIDs;
1120 };
1121
1122 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1123         : TestCase                                      (context, name, desc)
1124         , m_storage                                     (storage)
1125         , m_invocationGridSize          (512)
1126         , m_perInvocationSize           (2)
1127         , m_cmds                                        (ops.m_cmds)
1128         , m_useAtomic                           ((flags & FLAG_USE_ATOMIC) != 0)
1129         , m_formatInteger                       ((flags & FLAG_USE_INT) != 0)
1130 {
1131 }
1132
1133 InterCallTestCase::~InterCallTestCase (void)
1134 {
1135         deinit();
1136 }
1137
1138 void InterCallTestCase::init (void)
1139 {
1140         int programFriendlyName = 0;
1141
1142         // requirements
1143
1144         if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1145                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1146
1147         // generate resources and validate command list
1148
1149         m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1150         m_operationResultStorages.resize(m_cmds.size(), 0);
1151
1152         for (int step = 0; step < (int)m_cmds.size(); ++step)
1153         {
1154                 switch (m_cmds[step].type)
1155                 {
1156                         case InterCallOperations::Command::TYPE_WRITE:
1157                         {
1158                                 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1159
1160                                 // new storage handle?
1161                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1162                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1163
1164                                 // program
1165                                 {
1166                                         glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1167
1168                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1169                                         m_testCtx.getLog() << *program;
1170
1171                                         if (!program->isOk())
1172                                                 throw tcu::TestError("could not build program");
1173
1174                                         m_operationPrograms[step] = program;
1175                                 }
1176                                 break;
1177                         }
1178
1179                         case InterCallOperations::Command::TYPE_READ:
1180                         {
1181                                 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1182                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1183
1184                                 // program and result storage
1185                                 {
1186                                         glu::ShaderProgram* program = genReadProgram(cmd.seed);
1187
1188                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1189                                         m_testCtx.getLog() << *program;
1190
1191                                         if (!program->isOk())
1192                                                 throw tcu::TestError("could not build program");
1193
1194                                         m_operationPrograms[step] = program;
1195                                         m_operationResultStorages[step] = genResultStorage();
1196                                 }
1197                                 break;
1198                         }
1199
1200                         case InterCallOperations::Command::TYPE_BARRIER:
1201                         {
1202                                 break;
1203                         }
1204
1205                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1206                         {
1207                                 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1208                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1209                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1210
1211                                 // program
1212                                 {
1213                                         glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1214
1215                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1216                                         m_testCtx.getLog() << *program;
1217
1218                                         if (!program->isOk())
1219                                                 throw tcu::TestError("could not build program");
1220
1221                                         m_operationPrograms[step] = program;
1222                                         m_operationResultStorages[step] = genResultStorage();
1223                                 }
1224                                 break;
1225                         }
1226
1227                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1228                         {
1229                                 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1230
1231                                 // new storage handle?
1232                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1233                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1234
1235                                 // program
1236                                 {
1237                                         glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1238
1239                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1240                                         m_testCtx.getLog() << *program;
1241
1242                                         if (!program->isOk())
1243                                                 throw tcu::TestError("could not build program");
1244
1245                                         m_operationPrograms[step] = program;
1246                                 }
1247                                 break;
1248                         }
1249
1250                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1251                         {
1252                                 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1253                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1254
1255                                 // program
1256                                 {
1257                                         glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1258
1259                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1260                                         m_testCtx.getLog() << *program;
1261
1262                                         if (!program->isOk())
1263                                                 throw tcu::TestError("could not build program");
1264
1265                                         m_operationPrograms[step] = program;
1266                                         m_operationResultStorages[step] = genResultStorage();
1267                                 }
1268                                 break;
1269                         }
1270
1271                         case InterCallOperations::Command::TYPE_READ_ZERO:
1272                         {
1273                                 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1274
1275                                 // new storage handle?
1276                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1277                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1278
1279                                 // program
1280                                 {
1281                                         glu::ShaderProgram* program = genReadZeroProgram();
1282
1283                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1284                                         m_testCtx.getLog() << *program;
1285
1286                                         if (!program->isOk())
1287                                                 throw tcu::TestError("could not build program");
1288
1289                                         m_operationPrograms[step] = program;
1290                                         m_operationResultStorages[step] = genResultStorage();
1291                                 }
1292                                 break;
1293                         }
1294
1295                         default:
1296                                 DE_ASSERT(DE_FALSE);
1297                 }
1298         }
1299 }
1300
1301 void InterCallTestCase::deinit (void)
1302 {
1303         // programs
1304         for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1305                 delete m_operationPrograms[ndx];
1306         m_operationPrograms.clear();
1307
1308         // result storages
1309         for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1310         {
1311                 if (m_operationResultStorages[ndx])
1312                         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1313         }
1314         m_operationResultStorages.clear();
1315
1316         // storage
1317         for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1318         {
1319                 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1320
1321                 if (m_storage == STORAGE_BUFFER)
1322                         gl.deleteBuffers(1, &it->second);
1323                 else if (m_storage == STORAGE_IMAGE)
1324                         gl.deleteTextures(1, &it->second);
1325                 else
1326                         DE_ASSERT(DE_FALSE);
1327         }
1328         m_storageIDs.clear();
1329 }
1330
1331 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1332 {
1333         int programFriendlyName                 = 0;
1334         int resultStorageFriendlyName   = 0;
1335
1336         m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1337
1338         // run steps
1339
1340         for (int step = 0; step < (int)m_cmds.size(); ++step)
1341         {
1342                 switch (m_cmds[step].type)
1343                 {
1344                         case InterCallOperations::Command::TYPE_WRITE:                          runCommand(m_cmds[step].u_cmd.write,                    step,   programFriendlyName);                                                           break;
1345                         case InterCallOperations::Command::TYPE_READ:                           runCommand(m_cmds[step].u_cmd.read,                             step,   programFriendlyName, resultStorageFriendlyName);        break;
1346                         case InterCallOperations::Command::TYPE_BARRIER:                        runCommand(m_cmds[step].u_cmd.barrier);                                                                                                                                         break;
1347                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:          runCommand(m_cmds[step].u_cmd.readMulti,                step,   programFriendlyName, resultStorageFriendlyName);        break;
1348                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:       runCommand(m_cmds[step].u_cmd.writeInterleave,  step,   programFriendlyName);                                                           break;
1349                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:        runCommand(m_cmds[step].u_cmd.readInterleave,   step,   programFriendlyName, resultStorageFriendlyName);        break;
1350                         case InterCallOperations::Command::TYPE_READ_ZERO:                      runCommand(m_cmds[step].u_cmd.readZero,                 step,   programFriendlyName, resultStorageFriendlyName);        break;
1351                         default:
1352                                 DE_ASSERT(DE_FALSE);
1353                 }
1354         }
1355
1356         // read results from result buffers
1357         if (verifyResults())
1358                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1359         else
1360                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1361
1362         return STOP;
1363 }
1364
1365 bool InterCallTestCase::verifyResults (void)
1366 {
1367         int             resultBufferFriendlyName        = 0;
1368         bool    allResultsOk                            = true;
1369         bool    anyResult                                       = false;
1370
1371         m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1372
1373         for (int step = 0; step < (int)m_cmds.size(); ++step)
1374         {
1375                 const int       errorFloodThreshold     = 5;
1376                 int                     numErrorsLogged         = 0;
1377
1378                 if (m_operationResultStorages[step])
1379                 {
1380                         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1381                         const void*                             mapped  = DE_NULL;
1382                         std::vector<deInt32>    results (m_invocationGridSize * m_invocationGridSize);
1383                         bool                                    error   = false;
1384
1385                         anyResult = true;
1386
1387                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1388                         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1389                         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1390
1391                         // copy to properly aligned array
1392                         deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1393
1394                         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1395                                 throw tcu::TestError("memory map store corrupted");
1396
1397                         // check the results
1398                         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1399                         {
1400                                 if (results[ndx] != 1)
1401                                 {
1402                                         error = true;
1403
1404                                         if (numErrorsLogged == 0)
1405                                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1406                                         if (numErrorsLogged++ < errorFloodThreshold)
1407                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1408                                         else
1409                                         {
1410                                                 // after N errors, no point continuing verification
1411                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1412                                                 break;
1413                                         }
1414                                 }
1415                         }
1416
1417                         if (error)
1418                         {
1419                                 allResultsOk = false;
1420                         }
1421                         else
1422                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1423                 }
1424         }
1425
1426         DE_ASSERT(anyResult);
1427         DE_UNREF(anyResult);
1428
1429         return allResultsOk;
1430 }
1431
1432 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1433 {
1434         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1435
1436         m_testCtx.getLog()
1437                 << tcu::TestLog::Message
1438                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1439                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1440                 << tcu::TestLog::EndMessage;
1441
1442         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1443
1444         // set destination
1445         if (m_storage == STORAGE_BUFFER)
1446         {
1447                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1448
1449                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1450                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1451         }
1452         else if (m_storage == STORAGE_IMAGE)
1453         {
1454                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1455
1456                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1457                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1458         }
1459         else
1460                 DE_ASSERT(DE_FALSE);
1461
1462         // calc
1463         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1464         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1465 }
1466
1467 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1468 {
1469         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1470 }
1471
1472 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1473 {
1474         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1475
1476         DE_UNREF(cmd);
1477
1478         if (m_storage == STORAGE_BUFFER)
1479         {
1480                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1481                 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1482         }
1483         else if (m_storage == STORAGE_IMAGE)
1484         {
1485                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1486                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1487         }
1488         else
1489                 DE_ASSERT(DE_FALSE);
1490 }
1491
1492 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1493 {
1494         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1495
1496         m_testCtx.getLog()
1497                 << tcu::TestLog::Message
1498                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1499                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1500                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1501                 << tcu::TestLog::EndMessage;
1502
1503         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1504
1505         // set sources
1506         if (m_storage == STORAGE_BUFFER)
1507         {
1508                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1509                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1510
1511                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1512                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1513                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1514         }
1515         else if (m_storage == STORAGE_IMAGE)
1516         {
1517                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1518                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1519
1520                 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1521                 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1522                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1523         }
1524         else
1525                 DE_ASSERT(DE_FALSE);
1526
1527         // set destination
1528         DE_ASSERT(m_operationResultStorages[stepNdx]);
1529         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1530         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1531
1532         // calc
1533         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1534         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1535 }
1536
1537 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1538 {
1539         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1540
1541         m_testCtx.getLog()
1542                 << tcu::TestLog::Message
1543                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1544                 << "    Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1545                 << "    Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1546                 << tcu::TestLog::EndMessage;
1547
1548         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1549
1550         // set destination
1551         if (m_storage == STORAGE_BUFFER)
1552         {
1553                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1554
1555                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1556                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1557         }
1558         else if (m_storage == STORAGE_IMAGE)
1559         {
1560                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1561
1562                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1563                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1564         }
1565         else
1566                 DE_ASSERT(DE_FALSE);
1567
1568         // calc
1569         gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1570         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1571 }
1572
1573 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1574 {
1575         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1576 }
1577
1578 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1579 {
1580         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1581 }
1582
1583 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1584 {
1585         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1586
1587         m_testCtx.getLog()
1588                 << tcu::TestLog::Message
1589                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1590                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1591                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1592                 << tcu::TestLog::EndMessage;
1593
1594         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1595
1596         // set source
1597         if (m_storage == STORAGE_BUFFER)
1598         {
1599                 DE_ASSERT(m_storageIDs[targetHandle]);
1600
1601                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1602                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1603         }
1604         else if (m_storage == STORAGE_IMAGE)
1605         {
1606                 DE_ASSERT(m_storageIDs[targetHandle]);
1607
1608                 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1609                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1610         }
1611         else
1612                 DE_ASSERT(DE_FALSE);
1613
1614         // set destination
1615         DE_ASSERT(m_operationResultStorages[stepNdx]);
1616         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1617         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1618
1619         // calc
1620         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1621         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1622 }
1623
1624 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1625 {
1626         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1627
1628         if (m_storage == STORAGE_BUFFER)
1629         {
1630                 const int               numElements             = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1631                 const int               bufferSize              = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1632                 glw::GLuint             retVal                  = 0;
1633
1634                 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1635
1636                 gl.genBuffers(1, &retVal);
1637                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1638
1639                 if (m_formatInteger)
1640                 {
1641                         const std::vector<deUint32> zeroBuffer(numElements, 0);
1642                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1643                 }
1644                 else
1645                 {
1646                         const std::vector<float> zeroBuffer(numElements, 0.0f);
1647                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1648                 }
1649                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1650
1651                 return retVal;
1652         }
1653         else if (m_storage == STORAGE_IMAGE)
1654         {
1655                 const int       imageWidth      = m_invocationGridSize;
1656                 const int       imageHeight     = m_invocationGridSize * m_perInvocationSize;
1657                 glw::GLuint     retVal          = 0;
1658
1659                 m_testCtx.getLog()
1660                         << tcu::TestLog::Message
1661                         << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1662                         << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1663                         << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1664                         << tcu::TestLog::EndMessage;
1665
1666                 gl.genTextures(1, &retVal);
1667                 gl.bindTexture(GL_TEXTURE_2D, retVal);
1668
1669                 if (m_formatInteger)
1670                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1671                 else
1672                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1673
1674                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1675                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1676                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1677
1678                 m_testCtx.getLog()
1679                         << tcu::TestLog::Message
1680                         << "Filling image with 0"
1681                         << tcu::TestLog::EndMessage;
1682
1683                 if (m_formatInteger)
1684                 {
1685                         const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1686                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1687                 }
1688                 else
1689                 {
1690                         const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1691                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1692                 }
1693
1694                 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1695
1696                 return retVal;
1697         }
1698         else
1699         {
1700                 DE_ASSERT(DE_FALSE);
1701                 return 0;
1702         }
1703 }
1704
1705 glw::GLuint InterCallTestCase::genResultStorage (void)
1706 {
1707         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1708         glw::GLuint                             retVal  = 0;
1709
1710         gl.genBuffers(1, &retVal);
1711         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1712         gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1713         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1714
1715         return retVal;
1716 }
1717
1718 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1719 {
1720         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1721         std::ostringstream      buf;
1722
1723         buf << "#version 310 es\n"
1724                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1725                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1726
1727         if (m_storage == STORAGE_BUFFER)
1728                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1729                         << "{\n"
1730                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1731                         << "} sb_out;\n";
1732         else if (m_storage == STORAGE_IMAGE)
1733                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1734         else
1735                 DE_ASSERT(DE_FALSE);
1736
1737         buf << "\n"
1738                 << "void main (void)\n"
1739                 << "{\n"
1740                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1741                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1742                 << "\n";
1743
1744         // Write to buffer/image m_perInvocationSize elements
1745         if (m_storage == STORAGE_BUFFER)
1746         {
1747                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1748                 {
1749                         if (m_useAtomic)
1750                                 buf << "        atomicExchange(";
1751                         else
1752                                 buf << "        ";
1753
1754                         buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1755
1756                         if (m_useAtomic)
1757                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1758                         else
1759                                 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1760                 }
1761         }
1762         else if (m_storage == STORAGE_IMAGE)
1763         {
1764                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1765                 {
1766                         if (m_useAtomic)
1767                                 buf << "        imageAtomicExchange";
1768                         else
1769                                 buf << "        imageStore";
1770
1771                         buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1772
1773                         if (m_useAtomic)
1774                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1775                         else
1776                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1777                 }
1778         }
1779         else
1780                 DE_ASSERT(DE_FALSE);
1781
1782         buf << "}\n";
1783
1784         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1785 }
1786
1787 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1788 {
1789         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1790         std::ostringstream      buf;
1791
1792         buf << "#version 310 es\n"
1793                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1794                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1795
1796         if (m_storage == STORAGE_BUFFER)
1797                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1798                         << "{\n"
1799                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1800                         << "} sb_in;\n";
1801         else if (m_storage == STORAGE_IMAGE)
1802                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1803         else
1804                 DE_ASSERT(DE_FALSE);
1805
1806         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1807                 << "{\n"
1808                 << "    highp int resultOk[];\n"
1809                 << "} sb_result;\n"
1810                 << "\n"
1811                 << "void main (void)\n"
1812                 << "{\n"
1813                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1814                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1815                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1816                 << "    bool allOk = true;\n"
1817                 << "\n";
1818
1819         // Verify data
1820
1821         if (m_storage == STORAGE_BUFFER)
1822         {
1823                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1824                 {
1825                         if (!m_useAtomic)
1826                                 buf << "        allOk = allOk && (sb_in.values[(groupNdx + "
1827                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1828                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1829                         else
1830                                 buf << "        allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1831                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1832                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1833                 }
1834         }
1835         else if (m_storage == STORAGE_IMAGE)
1836         {
1837                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1838                 {
1839                         if (!m_useAtomic)
1840                                 buf     << "    allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1841                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1842                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1843                         else
1844                                 buf << "        allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1845                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1846                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1847                 }
1848         }
1849         else
1850                 DE_ASSERT(DE_FALSE);
1851
1852         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1853                 << "}\n";
1854
1855         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1856 }
1857
1858 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1859 {
1860         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1861         std::ostringstream      buf;
1862
1863         buf << "#version 310 es\n"
1864                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1865                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1866
1867         if (m_storage == STORAGE_BUFFER)
1868                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1869                         << "{\n"
1870                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1871                         << "} sb_in0;\n"
1872                         << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1873                         << "{\n"
1874                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1875                         << "} sb_in1;\n";
1876         else if (m_storage == STORAGE_IMAGE)
1877                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1878                         << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1879         else
1880                 DE_ASSERT(DE_FALSE);
1881
1882         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1883                 << "{\n"
1884                 << "    highp int resultOk[];\n"
1885                 << "} sb_result;\n"
1886                 << "\n"
1887                 << "void main (void)\n"
1888                 << "{\n"
1889                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1890                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1891                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1892                 << "    bool allOk = true;\n"
1893                 << "\n";
1894
1895         // Verify data
1896
1897         if (m_storage == STORAGE_BUFFER)
1898         {
1899                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1900                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1901                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1902         }
1903         else if (m_storage == STORAGE_IMAGE)
1904         {
1905                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1906                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1907                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1908         }
1909         else
1910                 DE_ASSERT(DE_FALSE);
1911
1912         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1913                 << "}\n";
1914
1915         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1916 }
1917
1918 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1919 {
1920         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1921         std::ostringstream      buf;
1922
1923         buf << "#version 310 es\n"
1924                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1925                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1926
1927         if (m_storage == STORAGE_BUFFER)
1928                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1929                         << "{\n"
1930                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1931                         << "} sb_out;\n";
1932         else if (m_storage == STORAGE_IMAGE)
1933                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1934         else
1935                 DE_ASSERT(DE_FALSE);
1936
1937         buf << "\n"
1938                 << "void main (void)\n"
1939                 << "{\n"
1940                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1941                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1942                 << "\n";
1943
1944         // Write to buffer/image m_perInvocationSize elements
1945         if (m_storage == STORAGE_BUFFER)
1946         {
1947                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1948                 {
1949                         if (m_useAtomic)
1950                                 buf << "        atomicExchange(";
1951                         else
1952                                 buf << "        ";
1953
1954                         buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1955
1956                         if (m_useAtomic)
1957                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1958                         else
1959                                 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1960                 }
1961         }
1962         else if (m_storage == STORAGE_IMAGE)
1963         {
1964                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1965                 {
1966                         if (m_useAtomic)
1967                                 buf << "        imageAtomicExchange";
1968                         else
1969                                 buf << "        imageStore";
1970
1971                         buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1972
1973                         if (m_useAtomic)
1974                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1975                         else
1976                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1977                 }
1978         }
1979         else
1980                 DE_ASSERT(DE_FALSE);
1981
1982         buf << "}\n";
1983
1984         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1985 }
1986
1987 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
1988 {
1989         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1990         std::ostringstream      buf;
1991
1992         buf << "#version 310 es\n"
1993                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1994                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1995
1996         if (m_storage == STORAGE_BUFFER)
1997                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1998                         << "{\n"
1999                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2000                         << "} sb_in;\n";
2001         else if (m_storage == STORAGE_IMAGE)
2002                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2003         else
2004                 DE_ASSERT(DE_FALSE);
2005
2006         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2007                 << "{\n"
2008                 << "    highp int resultOk[];\n"
2009                 << "} sb_result;\n"
2010                 << "\n"
2011                 << "void main (void)\n"
2012                 << "{\n"
2013                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2014                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2015                 << "    int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2016                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2017                 << "    bool allOk = true;\n"
2018                 << "\n";
2019
2020         // Verify data
2021
2022         if (m_storage == STORAGE_BUFFER)
2023         {
2024                 buf << "        if (groupNdx % 2 == 0)\n"
2025                         << "    {\n";
2026                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2027                         buf << "                allOk = allOk && ("
2028                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2029                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2030                 buf << "        }\n"
2031                         << "    else\n"
2032                         << "    {\n";
2033                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2034                         buf << "                allOk = allOk && ("
2035                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2036                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2037                 buf << "        }\n";
2038         }
2039         else if (m_storage == STORAGE_IMAGE)
2040         {
2041                 buf << "        if (groupNdx % 2 == 0)\n"
2042                         << "    {\n";
2043                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2044                         buf << "                allOk = allOk && ("
2045                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2046                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2047                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2048                 buf << "        }\n"
2049                         << "    else\n"
2050                         << "    {\n";
2051                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2052                         buf << "                allOk = allOk && ("
2053                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2054                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2055                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2056                 buf << "        }\n";
2057         }
2058         else
2059                 DE_ASSERT(DE_FALSE);
2060
2061         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2062                 << "}\n";
2063
2064         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
2065 }
2066
2067 glu::ShaderProgram*     InterCallTestCase::genReadZeroProgram (void)
2068 {
2069         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2070         std::ostringstream      buf;
2071
2072         buf << "#version 310 es\n"
2073                 << ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
2074                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2075
2076         if (m_storage == STORAGE_BUFFER)
2077                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2078                         << "{\n"
2079                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2080                         << "} sb_in;\n";
2081         else if (m_storage == STORAGE_IMAGE)
2082                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2083         else
2084                 DE_ASSERT(DE_FALSE);
2085
2086         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2087                 << "{\n"
2088                 << "    highp int resultOk[];\n"
2089                 << "} sb_result;\n"
2090                 << "\n"
2091                 << "void main (void)\n"
2092                 << "{\n"
2093                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2094                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2095                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2096                 << "    bool allOk = true;\n"
2097                 << "\n";
2098
2099         // Verify data
2100
2101         if (m_storage == STORAGE_BUFFER)
2102         {
2103                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2104                         buf << "        allOk = allOk && ("
2105                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2106                                 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2107         }
2108         else if (m_storage == STORAGE_IMAGE)
2109         {
2110                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2111                         buf << "        allOk = allOk && ("
2112                         << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2113                         << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2114         }
2115         else
2116                 DE_ASSERT(DE_FALSE);
2117
2118         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2119                 << "}\n";
2120
2121         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
2122 }
2123
2124 class SSBOConcurrentAtomicCase : public TestCase
2125 {
2126 public:
2127
2128                                                         SSBOConcurrentAtomicCase        (Context& context, const char* name, const char* description, int numCalls, int workSize);
2129                                                         ~SSBOConcurrentAtomicCase       (void);
2130
2131         void                                    init                                            (void);
2132         void                                    deinit                                          (void);
2133         IterateResult                   iterate                                         (void);
2134
2135 private:
2136         std::string                             genComputeSource                        (void) const;
2137
2138         const int                               m_numCalls;
2139         const int                               m_workSize;
2140         glu::ShaderProgram*             m_program;
2141         deUint32                                m_bufferID;
2142         std::vector<deUint32>   m_intermediateResultBuffers;
2143 };
2144
2145 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2146         : TestCase              (context, name, description)
2147         , m_numCalls    (numCalls)
2148         , m_workSize    (workSize)
2149         , m_program             (DE_NULL)
2150         , m_bufferID    (DE_NULL)
2151 {
2152 }
2153
2154 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2155 {
2156         deinit();
2157 }
2158
2159 void SSBOConcurrentAtomicCase::init (void)
2160 {
2161         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2162         std::vector<deUint32>   zeroData                        (m_workSize, 0);
2163
2164         // gen buffers
2165
2166         gl.genBuffers(1, &m_bufferID);
2167         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2168         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2169
2170         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2171         {
2172                 deUint32 buffer = 0;
2173
2174                 gl.genBuffers(1, &buffer);
2175                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2176                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2177
2178                 m_intermediateResultBuffers.push_back(buffer);
2179                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2180         }
2181
2182         // gen program
2183
2184         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2185         m_testCtx.getLog() << *m_program;
2186         if (!m_program->isOk())
2187                 throw tcu::TestError("could not build program");
2188 }
2189
2190 void SSBOConcurrentAtomicCase::deinit (void)
2191 {
2192         if (m_bufferID)
2193         {
2194                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2195                 m_bufferID = 0;
2196         }
2197
2198         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2199                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2200         m_intermediateResultBuffers.clear();
2201
2202         delete m_program;
2203         m_program = DE_NULL;
2204 }
2205
2206 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2207 {
2208         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2209         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2210         std::vector<int>                deltas;
2211
2212         // generate unique deltas
2213         generateShuffledRamp(m_numCalls, deltas);
2214
2215         // invoke program N times, each with a different delta
2216         {
2217                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2218
2219                 m_testCtx.getLog()
2220                         << tcu::TestLog::Message
2221                         << "Running shader " << m_numCalls << " times.\n"
2222                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2223                         << "Setting u_atomicDelta to a unique value for each call.\n"
2224                         << tcu::TestLog::EndMessage;
2225
2226                 if (deltaLocation == -1)
2227                         throw tcu::TestError("u_atomicDelta location was -1");
2228
2229                 gl.useProgram(m_program->getProgram());
2230                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2231
2232                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2233                 {
2234                         m_testCtx.getLog()
2235                                 << tcu::TestLog::Message
2236                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2237                                 << tcu::TestLog::EndMessage;
2238
2239                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2240                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2241                         gl.dispatchCompute(m_workSize, 1, 1);
2242                 }
2243
2244                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2245         }
2246
2247         // Verify result
2248         {
2249                 std::vector<deUint32> result;
2250
2251                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2252
2253                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2254                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2255
2256                 for (int ndx = 0; ndx < m_workSize; ++ndx)
2257                 {
2258                         if (result[ndx] != sumValue)
2259                         {
2260                                 m_testCtx.getLog()
2261                                         << tcu::TestLog::Message
2262                                         << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2263                                         << "Work buffer contains invalid values."
2264                                         << tcu::TestLog::EndMessage;
2265
2266                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2267                                 return STOP;
2268                         }
2269                 }
2270
2271                 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2272         }
2273
2274         // verify steps
2275         {
2276                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2277                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2278
2279                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2280
2281                 // collect results
2282
2283                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2284                 {
2285                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2286                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2287                 }
2288
2289                 // verify values
2290
2291                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2292                 {
2293                         int                     invalidOperationNdx;
2294                         deUint32        errorDelta;
2295                         deUint32        errorExpected;
2296
2297                         // collect result chain for each element
2298                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2299                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2300
2301                         // check there exists a path from 0 to sumValue using each addition once
2302                         // decompose cumulative results to addition operations (all additions positive => this works)
2303
2304                         std::sort(valueChain.begin(), valueChain.end());
2305
2306                         // validate chain
2307                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2308                         {
2309                                 m_testCtx.getLog()
2310                                         << tcu::TestLog::Message
2311                                         << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2312                                         << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2313                                         << tcu::TestLog::EndMessage;
2314
2315                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2316                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2317                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2318
2319                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2320                                 return STOP;
2321                         }
2322                 }
2323
2324                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2325         }
2326
2327         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2328         return STOP;
2329 }
2330
2331 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2332 {
2333         std::ostringstream buf;
2334
2335         buf     << "#version 310 es\n"
2336                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2337                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2338                 << "{\n"
2339                 << "    highp uint values[" << m_workSize << "];\n"
2340                 << "} sb_ires;\n"
2341                 << "\n"
2342                 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2343                 << "{\n"
2344                 << "    highp uint values[" << m_workSize << "];\n"
2345                 << "} sb_work;\n"
2346                 << "uniform highp uint u_atomicDelta;\n"
2347                 << "\n"
2348                 << "void main ()\n"
2349                 << "{\n"
2350                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2351                 << "    sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2352                 << "}";
2353
2354         return buf.str();
2355 }
2356
2357 class ConcurrentAtomicCounterCase : public TestCase
2358 {
2359 public:
2360
2361                                                         ConcurrentAtomicCounterCase             (Context& context, const char* name, const char* description, int numCalls, int workSize);
2362                                                         ~ConcurrentAtomicCounterCase    (void);
2363
2364         void                                    init                                                    (void);
2365         void                                    deinit                                                  (void);
2366         IterateResult                   iterate                                                 (void);
2367
2368 private:
2369         std::string                             genComputeSource                                (bool evenOdd) const;
2370
2371         const int                               m_numCalls;
2372         const int                               m_workSize;
2373         glu::ShaderProgram*             m_evenProgram;
2374         glu::ShaderProgram*             m_oddProgram;
2375         deUint32                                m_counterBuffer;
2376         deUint32                                m_intermediateResultBuffer;
2377 };
2378
2379 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2380         : TestCase                                      (context, name, description)
2381         , m_numCalls                            (numCalls)
2382         , m_workSize                            (workSize)
2383         , m_evenProgram                         (DE_NULL)
2384         , m_oddProgram                          (DE_NULL)
2385         , m_counterBuffer                       (DE_NULL)
2386         , m_intermediateResultBuffer(DE_NULL)
2387 {
2388 }
2389
2390 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2391 {
2392         deinit();
2393 }
2394
2395 void ConcurrentAtomicCounterCase::init (void)
2396 {
2397         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
2398         const std::vector<deUint32>     zeroData        (m_numCalls * m_workSize, 0);
2399
2400         // gen buffer
2401
2402         gl.genBuffers(1, &m_counterBuffer);
2403         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2404         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2405
2406         gl.genBuffers(1, &m_intermediateResultBuffer);
2407         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2408         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2409
2410         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2411
2412         // gen programs
2413
2414         {
2415                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2416
2417                 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2418                 m_testCtx.getLog() << *m_evenProgram;
2419                 if (!m_evenProgram->isOk())
2420                         throw tcu::TestError("could not build program");
2421         }
2422         {
2423                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2424
2425                 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2426                 m_testCtx.getLog() << *m_oddProgram;
2427                 if (!m_oddProgram->isOk())
2428                         throw tcu::TestError("could not build program");
2429         }
2430 }
2431
2432 void ConcurrentAtomicCounterCase::deinit (void)
2433 {
2434         if (m_counterBuffer)
2435         {
2436                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2437                 m_counterBuffer = 0;
2438         }
2439         if (m_intermediateResultBuffer)
2440         {
2441                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2442                 m_intermediateResultBuffer = 0;
2443         }
2444
2445         delete m_evenProgram;
2446         m_evenProgram = DE_NULL;
2447
2448         delete m_oddProgram;
2449         m_oddProgram = DE_NULL;
2450 }
2451
2452 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2453 {
2454         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2455
2456         // invoke program N times, each with a different delta
2457         {
2458                 const int evenCallNdxLocation   = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2459                 const int oddCallNdxLocation    = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2460
2461                 m_testCtx.getLog()
2462                         << tcu::TestLog::Message
2463                         << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2464                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2465                         << tcu::TestLog::EndMessage;
2466
2467                 if (evenCallNdxLocation == -1)
2468                         throw tcu::TestError("u_callNdx location was -1");
2469                 if (oddCallNdxLocation == -1)
2470                         throw tcu::TestError("u_callNdx location was -1");
2471
2472                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2473                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2474
2475                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2476                 {
2477                         gl.useProgram(m_evenProgram->getProgram());
2478                         gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2479                         gl.dispatchCompute(m_workSize, 1, 1);
2480
2481                         gl.useProgram(m_oddProgram->getProgram());
2482                         gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2483                         gl.dispatchCompute(m_workSize, 1, 1);
2484                 }
2485
2486                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2487         }
2488
2489         // Verify result
2490         {
2491                 deUint32 result;
2492
2493                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2494
2495                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2496                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2497
2498                 if ((int)result != m_numCalls*m_workSize)
2499                 {
2500                         m_testCtx.getLog()
2501                                 << tcu::TestLog::Message
2502                                 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2503                                 << tcu::TestLog::EndMessage;
2504
2505                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2506                         return STOP;
2507                 }
2508
2509                 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2510         }
2511
2512         // verify steps
2513         {
2514                 std::vector<deUint32> intermediateResults;
2515
2516                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2517
2518                 // collect results
2519
2520                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2521                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2522
2523                 // verify values
2524
2525                 std::sort(intermediateResults.begin(), intermediateResults.end());
2526
2527                 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2528                 {
2529                         if ((int)intermediateResults[valueNdx] != valueNdx)
2530                         {
2531                                 m_testCtx.getLog()
2532                                         << tcu::TestLog::Message
2533                                         << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2534                                         << "Intermediate buffer contains invalid values. Intermediate results:\n"
2535                                         << tcu::TestLog::EndMessage;
2536
2537                                 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2538                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2539
2540                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2541                                 return STOP;
2542                         }
2543                 }
2544
2545                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2546         }
2547
2548         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2549         return STOP;
2550 }
2551
2552 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2553 {
2554         std::ostringstream buf;
2555
2556         buf     << "#version 310 es\n"
2557                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2558                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2559                 << "{\n"
2560                 << "    highp uint values[" << m_workSize * m_numCalls << "];\n"
2561                 << "} sb_ires;\n"
2562                 << "\n"
2563                 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2564                 << "uniform highp uint u_callNdx;\n"
2565                 << "\n"
2566                 << "void main ()\n"
2567                 << "{\n"
2568                 << "    highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2569                 << "    if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2570                 << "            sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2571                 << "}";
2572
2573         return buf.str();
2574 }
2575
2576 class ConcurrentImageAtomicCase : public TestCase
2577 {
2578 public:
2579
2580                                                         ConcurrentImageAtomicCase       (Context& context, const char* name, const char* description, int numCalls, int workSize);
2581                                                         ~ConcurrentImageAtomicCase      (void);
2582
2583         void                                    init                                            (void);
2584         void                                    deinit                                          (void);
2585         IterateResult                   iterate                                         (void);
2586
2587 private:
2588         void                                    readWorkImage                           (std::vector<deUint32>& result);
2589
2590         std::string                             genComputeSource                        (void) const;
2591         std::string                             genImageReadSource                      (void) const;
2592         std::string                             genImageClearSource                     (void) const;
2593
2594         const int                               m_numCalls;
2595         const int                               m_workSize;
2596         glu::ShaderProgram*             m_program;
2597         glu::ShaderProgram*             m_imageReadProgram;
2598         glu::ShaderProgram*             m_imageClearProgram;
2599         deUint32                                m_imageID;
2600         std::vector<deUint32>   m_intermediateResultBuffers;
2601 };
2602
2603 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2604         : TestCase                              (context, name, description)
2605         , m_numCalls                    (numCalls)
2606         , m_workSize                    (workSize)
2607         , m_program                             (DE_NULL)
2608         , m_imageReadProgram    (DE_NULL)
2609         , m_imageClearProgram   (DE_NULL)
2610         , m_imageID                             (DE_NULL)
2611 {
2612 }
2613
2614 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2615 {
2616         deinit();
2617 }
2618
2619 void ConcurrentImageAtomicCase::init (void)
2620 {
2621         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2622         std::vector<deUint32>   zeroData                        (m_workSize * m_workSize, 0);
2623
2624         if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2625                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2626
2627         // gen image
2628
2629         gl.genTextures(1, &m_imageID);
2630         gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2631         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2632         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2633         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2634         GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2635
2636         // gen buffers
2637
2638         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2639         {
2640                 deUint32 buffer = 0;
2641
2642                 gl.genBuffers(1, &buffer);
2643                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2644                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2645
2646                 m_intermediateResultBuffers.push_back(buffer);
2647                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2648         }
2649
2650         // gen programs
2651
2652         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2653         m_testCtx.getLog() << *m_program;
2654         if (!m_program->isOk())
2655                 throw tcu::TestError("could not build program");
2656
2657         m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2658         if (!m_imageReadProgram->isOk())
2659         {
2660                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2661
2662                 m_testCtx.getLog() << *m_imageReadProgram;
2663                 throw tcu::TestError("could not build program");
2664         }
2665
2666         m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2667         if (!m_imageClearProgram->isOk())
2668         {
2669                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2670
2671                 m_testCtx.getLog() << *m_imageClearProgram;
2672                 throw tcu::TestError("could not build program");
2673         }
2674 }
2675
2676 void ConcurrentImageAtomicCase::deinit (void)
2677 {
2678         if (m_imageID)
2679         {
2680                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2681                 m_imageID = 0;
2682         }
2683
2684         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2685                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2686         m_intermediateResultBuffers.clear();
2687
2688         delete m_program;
2689         m_program = DE_NULL;
2690
2691         delete m_imageReadProgram;
2692         m_imageReadProgram = DE_NULL;
2693
2694         delete m_imageClearProgram;
2695         m_imageClearProgram = DE_NULL;
2696 }
2697
2698 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2699 {
2700         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2701         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2702         std::vector<int>                deltas;
2703
2704         // generate unique deltas
2705         generateShuffledRamp(m_numCalls, deltas);
2706
2707         // clear image
2708         {
2709                 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2710
2711                 gl.useProgram(m_imageClearProgram->getProgram());
2712                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2713                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2714                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2715
2716                 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2717         }
2718
2719         // invoke program N times, each with a different delta
2720         {
2721                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2722
2723                 m_testCtx.getLog()
2724                         << tcu::TestLog::Message
2725                         << "Running shader " << m_numCalls << " times.\n"
2726                         << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2727                         << "Setting u_atomicDelta to a unique value for each call.\n"
2728                         << tcu::TestLog::EndMessage;
2729
2730                 if (deltaLocation == -1)
2731                         throw tcu::TestError("u_atomicDelta location was -1");
2732
2733                 gl.useProgram(m_program->getProgram());
2734                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2735
2736                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2737                 {
2738                         m_testCtx.getLog()
2739                                 << tcu::TestLog::Message
2740                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2741                                 << tcu::TestLog::EndMessage;
2742
2743                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2744                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2745                         gl.dispatchCompute(m_workSize, m_workSize, 1);
2746                 }
2747
2748                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2749         }
2750
2751         // Verify result
2752         {
2753                 std::vector<deUint32> result;
2754
2755                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2756
2757                 readWorkImage(result);
2758
2759                 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2760                 {
2761                         if (result[ndx] != sumValue)
2762                         {
2763                                 m_testCtx.getLog()
2764                                         << tcu::TestLog::Message
2765                                         << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2766                                         << "Work image contains invalid values."
2767                                         << tcu::TestLog::EndMessage;
2768
2769                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2770                                 return STOP;
2771                         }
2772                 }
2773
2774                 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2775         }
2776
2777         // verify steps
2778         {
2779                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2780                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2781                 std::vector<deUint32>                           chainDelta                      (m_numCalls);
2782
2783                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2784
2785                 // collect results
2786
2787                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2788                 {
2789                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2790                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2791                 }
2792
2793                 // verify values
2794
2795                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2796                 {
2797                         int                     invalidOperationNdx;
2798                         deUint32        errorDelta;
2799                         deUint32        errorExpected;
2800
2801                         // collect result chain for each element
2802                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2803                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2804
2805                         // check there exists a path from 0 to sumValue using each addition once
2806                         // decompose cumulative results to addition operations (all additions positive => this works)
2807
2808                         std::sort(valueChain.begin(), valueChain.end());
2809
2810                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811                                 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2812
2813                         // chainDelta contains now the actual additions applied to the value
2814                         std::sort(chainDelta.begin(), chainDelta.end());
2815
2816                         // validate chain
2817                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2818                         {
2819                                 m_testCtx.getLog()
2820                                         << tcu::TestLog::Message
2821                                         << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2822                                         << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2823                                         << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2824                                         << tcu::TestLog::EndMessage;
2825
2826                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2827                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2828                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2829
2830                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2831                                 return STOP;
2832                         }
2833                 }
2834
2835                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2836         }
2837
2838         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2839         return STOP;
2840 }
2841
2842 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2843 {
2844         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2845         glu::Buffer                             resultBuffer    (m_context.getRenderContext());
2846
2847         // Read image to an ssbo
2848
2849         {
2850                 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2851
2852                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2853                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2854
2855                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2856                 gl.useProgram(m_imageReadProgram->getProgram());
2857
2858                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2859                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2860                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2861
2862                 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2863         }
2864
2865         // Read ssbo
2866         {
2867                 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2868                 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2869
2870                 if (!ptr)
2871                         throw tcu::TestError("mapBufferRange returned NULL");
2872
2873                 result.resize(m_workSize * m_workSize);
2874                 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2875
2876                 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2877                         throw tcu::TestError("unmapBuffer returned false");
2878         }
2879 }
2880
2881 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2882 {
2883         std::ostringstream buf;
2884
2885         buf     << "#version 310 es\n"
2886                 << "#extension GL_OES_shader_image_atomic : require\n"
2887                 << "\n"
2888                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2889                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2890                 << "{\n"
2891                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2892                 << "} sb_ires;\n"
2893                 << "\n"
2894                 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2895                 << "uniform highp uint u_atomicDelta;\n"
2896                 << "\n"
2897                 << "void main ()\n"
2898                 << "{\n"
2899                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2900                 << "    sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2901                 << "}";
2902
2903         return buf.str();
2904 }
2905
2906 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2907 {
2908         std::ostringstream buf;
2909
2910         buf     << "#version 310 es\n"
2911                 << "\n"
2912                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2913                 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2914                 << "{\n"
2915                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2916                 << "} sb_res;\n"
2917                 << "\n"
2918                 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2919                 << "\n"
2920                 << "void main ()\n"
2921                 << "{\n"
2922                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923                 << "    sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2924                 << "}";
2925
2926         return buf.str();
2927 }
2928
2929 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2930 {
2931         std::ostringstream buf;
2932
2933         buf     << "#version 310 es\n"
2934                 << "\n"
2935                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936                 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2937                 << "\n"
2938                 << "void main ()\n"
2939                 << "{\n"
2940                 << "    imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2941                 << "}";
2942
2943         return buf.str();
2944 }
2945
2946 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2947 {
2948 public:
2949                                                         ConcurrentSSBOAtomicCounterMixedCase    (Context& context, const char* name, const char* description, int numCalls, int workSize);
2950                                                         ~ConcurrentSSBOAtomicCounterMixedCase   (void);
2951
2952         void                                    init                                                                    (void);
2953         void                                    deinit                                                                  (void);
2954         IterateResult                   iterate                                                                 (void);
2955
2956 private:
2957         std::string                             genSSBOComputeSource                                    (void) const;
2958         std::string                             genAtomicCounterComputeSource                   (void) const;
2959
2960         const int                               m_numCalls;
2961         const int                               m_workSize;
2962         deUint32                                m_bufferID;
2963         glu::ShaderProgram*             m_ssboAtomicProgram;
2964         glu::ShaderProgram*             m_atomicCounterProgram;
2965 };
2966
2967 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2968         : TestCase                                      (context, name, description)
2969         , m_numCalls                            (numCalls)
2970         , m_workSize                            (workSize)
2971         , m_bufferID                            (DE_NULL)
2972         , m_ssboAtomicProgram           (DE_NULL)
2973         , m_atomicCounterProgram        (DE_NULL)
2974 {
2975         // SSBO atomic XORs cancel out
2976         DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2977 }
2978
2979 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
2980 {
2981         deinit();
2982 }
2983
2984 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
2985 {
2986         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
2987         const deUint32                          zeroBuf[2]      = { 0, 0 };
2988
2989         // gen buffer
2990
2991         gl.genBuffers(1, &m_bufferID);
2992         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2993         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
2994
2995         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2996
2997         // gen programs
2998
2999         {
3000                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3001
3002                 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3003                 m_testCtx.getLog() << *m_ssboAtomicProgram;
3004                 if (!m_ssboAtomicProgram->isOk())
3005                         throw tcu::TestError("could not build program");
3006         }
3007         {
3008                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3009
3010                 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3011                 m_testCtx.getLog() << *m_atomicCounterProgram;
3012                 if (!m_atomicCounterProgram->isOk())
3013                         throw tcu::TestError("could not build program");
3014         }
3015 }
3016
3017 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3018 {
3019         if (m_bufferID)
3020         {
3021                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3022                 m_bufferID = 0;
3023         }
3024
3025         delete m_ssboAtomicProgram;
3026         m_ssboAtomicProgram = DE_NULL;
3027
3028         delete m_atomicCounterProgram;
3029         m_atomicCounterProgram = DE_NULL;
3030 }
3031
3032 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3033 {
3034         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3035
3036         m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3037
3038         // invoke programs N times
3039         {
3040                 m_testCtx.getLog()
3041                         << tcu::TestLog::Message
3042                         << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3043                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
3044                         << tcu::TestLog::EndMessage;
3045
3046                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3047                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3048
3049                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3050                 {
3051                         gl.useProgram(m_atomicCounterProgram->getProgram());
3052                         gl.dispatchCompute(m_workSize, 1, 1);
3053
3054                         gl.useProgram(m_ssboAtomicProgram->getProgram());
3055                         gl.dispatchCompute(m_workSize, 1, 1);
3056                 }
3057
3058                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3059         }
3060
3061         // Verify result
3062         {
3063                 deUint32 result;
3064
3065                 // XORs cancel out, only addition is left
3066                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3067
3068                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3069                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3070
3071                 if ((int)result != m_numCalls*m_workSize)
3072                 {
3073                         m_testCtx.getLog()
3074                                 << tcu::TestLog::Message
3075                                 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3076                                 << tcu::TestLog::EndMessage;
3077
3078                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3079                         return STOP;
3080                 }
3081
3082                 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3083         }
3084
3085         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3086         return STOP;
3087 }
3088
3089 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3090 {
3091         std::ostringstream buf;
3092
3093         buf     << "#version 310 es\n"
3094                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3095                 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3096                 << "{\n"
3097                 << "    highp uint targetValue;\n"
3098                 << "    highp uint dummy;\n"
3099                 << "} sb_work;\n"
3100                 << "\n"
3101                 << "void main ()\n"
3102                 << "{\n"
3103                 << "    // flip high bits\n"
3104                 << "    highp uint mask = uint(1) << (16u + (gl_GlobalInvocationID.x % 16u));\n"
3105                 << "    sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3106                 << "}";
3107
3108         return buf.str();
3109 }
3110
3111 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3112 {
3113         std::ostringstream buf;
3114
3115         buf     << "#version 310 es\n"
3116                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3117                 << "\n"
3118                 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3119                 << "\n"
3120                 << "void main ()\n"
3121                 << "{\n"
3122                 << "    atomicCounterIncrement(u_counter);\n"
3123                 << "}";
3124
3125         return buf.str();
3126 }
3127
3128 } // anonymous
3129
3130 SynchronizationTests::SynchronizationTests (Context& context)
3131         : TestCaseGroup(context, "synchronization", "Synchronization tests")
3132 {
3133 }
3134
3135 SynchronizationTests::~SynchronizationTests (void)
3136 {
3137 }
3138
3139 void SynchronizationTests::init (void)
3140 {
3141         tcu::TestCaseGroup* const inInvocationGroup             = new tcu::TestCaseGroup(m_testCtx, "in_invocation",    "Test intra-invocation synchronization");
3142         tcu::TestCaseGroup* const interInvocationGroup  = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3143         tcu::TestCaseGroup* const interCallGroup                = new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3144
3145         addChild(inInvocationGroup);
3146         addChild(interInvocationGroup);
3147         addChild(interCallGroup);
3148
3149         // .in_invocation & .inter_invocation
3150         {
3151                 static const struct CaseConfig
3152                 {
3153                         const char*                                                                     namePrefix;
3154                         const InterInvocationTestCase::StorageType      storage;
3155                         const int                                                                       flags;
3156                 } configs[] =
3157                 {
3158                         { "image",                      InterInvocationTestCase::STORAGE_IMAGE,         0                                                                               },
3159                         { "image_atomic",       InterInvocationTestCase::STORAGE_IMAGE,         InterInvocationTestCase::FLAG_ATOMIC    },
3160                         { "ssbo",                       InterInvocationTestCase::STORAGE_BUFFER,        0                                                                               },
3161                         { "ssbo_atomic",        InterInvocationTestCase::STORAGE_BUFFER,        InterInvocationTestCase::FLAG_ATOMIC    },
3162                 };
3163
3164                 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3165                 {
3166                         tcu::TestCaseGroup* const       targetGroup     = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3167                         const int                                       extraFlags      = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3168
3169                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3170                         {
3171                                 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3172
3173                                 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3174                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3175                                                                                                                                   (std::string("Write to ") + target + " and read it").c_str(),
3176                                                                                                                                   configs[configNdx].storage,
3177                                                                                                                                   configs[configNdx].flags | extraFlags));
3178
3179                                 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3180                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3181                                                                                                                                   (std::string("Read form ") + target + " and then write to it").c_str(),
3182                                                                                                                                   configs[configNdx].storage,
3183                                                                                                                                   configs[configNdx].flags | extraFlags));
3184
3185                                 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3186                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3187                                                                                                                                   (std::string("Write to ") + target + " twice and read it").c_str(),
3188                                                                                                                                   configs[configNdx].storage,
3189                                                                                                                                   configs[configNdx].flags | extraFlags));
3190
3191                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3192                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3193                                                                                                                                    (std::string("Write to aliasing ") + target + " and read it").c_str(),
3194                                                                                                                                    InvocationAliasWriteCase::TYPE_WRITE,
3195                                                                                                                                    configs[configNdx].storage,
3196                                                                                                                                    configs[configNdx].flags | extraFlags));
3197
3198                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3199                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3200                                                                                                                                    (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3201                                                                                                                                    InvocationAliasWriteCase::TYPE_OVERWRITE,
3202                                                                                                                                    configs[configNdx].storage,
3203                                                                                                                                    configs[configNdx].flags | extraFlags));
3204                         }
3205                 }
3206         }
3207
3208         // .inter_call
3209         {
3210                 tcu::TestCaseGroup* const withBarrierGroup              = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3211                 tcu::TestCaseGroup* const withoutBarrierGroup   = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3212
3213                 interCallGroup->addChild(withBarrierGroup);
3214                 interCallGroup->addChild(withoutBarrierGroup);
3215
3216                 // .with_memory_barrier
3217                 {
3218                         static const struct CaseConfig
3219                         {
3220                                 const char*                                                             namePrefix;
3221                                 const InterCallTestCase::StorageType    storage;
3222                                 const int                                                               flags;
3223                         } configs[] =
3224                         {
3225                                 { "image",                      InterCallTestCase::STORAGE_IMAGE,       0                                                                                                                                               },
3226                                 { "image_atomic",       InterCallTestCase::STORAGE_IMAGE,       InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3227                                 { "ssbo",                       InterCallTestCase::STORAGE_BUFFER,      0                                                                                                                                               },
3228                                 { "ssbo_atomic",        InterCallTestCase::STORAGE_BUFFER,      InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3229                         };
3230
3231                         const int seed0 = 123;
3232                         const int seed1 = 457;
3233
3234                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3235                         {
3236                                 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3237
3238                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3239                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3240                                                                                                                                  (std::string("Write to ") + target + " and read it").c_str(),
3241                                                                                                                                  configs[configNdx].storage,
3242                                                                                                                                  configs[configNdx].flags,
3243                                                                                                                                  InterCallOperations()
3244                                                                                                                                         << op::WriteData::Generate(1, seed0)
3245                                                                                                                                         << op::Barrier()
3246                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3247
3248                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3249                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3250                                                                                                                                  (std::string("Read from ") + target + " and then write to it").c_str(),
3251                                                                                                                                  configs[configNdx].storage,
3252                                                                                                                                  configs[configNdx].flags,
3253                                                                                                                                  InterCallOperations()
3254                                                                                                                                         << op::ReadZeroData::Generate(1)
3255                                                                                                                                         << op::Barrier()
3256                                                                                                                                         << op::WriteData::Generate(1, seed0)));
3257
3258                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3259                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3260                                                                                                                                  (std::string("Write to ") + target + " twice and read it").c_str(),
3261                                                                                                                                  configs[configNdx].storage,
3262                                                                                                                                  configs[configNdx].flags,
3263                                                                                                                                  InterCallOperations()
3264                                                                                                                                         << op::WriteData::Generate(1, seed0)
3265                                                                                                                                         << op::Barrier()
3266                                                                                                                                         << op::WriteData::Generate(1, seed1)
3267                                                                                                                                         << op::Barrier()
3268                                                                                                                                         << op::ReadData::Generate(1, seed1)));
3269
3270                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3271                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3272                                                                                                                                  (std::string("Write to multiple ") + target + "s and read them").c_str(),
3273                                                                                                                                  configs[configNdx].storage,
3274                                                                                                                                  configs[configNdx].flags,
3275                                                                                                                                  InterCallOperations()
3276                                                                                                                                         << op::WriteData::Generate(1, seed0)
3277                                                                                                                                         << op::WriteData::Generate(2, seed1)
3278                                                                                                                                         << op::Barrier()
3279                                                                                                                                         << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3280
3281                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3283                                                                                                                                  (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3284                                                                                                                                  configs[configNdx].storage,
3285                                                                                                                                  configs[configNdx].flags,
3286                                                                                                                                  InterCallOperations()
3287                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed0, true)
3288                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed1, false)
3289                                                                                                                                         << op::Barrier()
3290                                                                                                                                         << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3291
3292                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3293                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3294                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3295                                                                                                                                  configs[configNdx].storage,
3296                                                                                                                                  configs[configNdx].flags,
3297                                                                                                                                  InterCallOperations()
3298                                                                                                                                         << op::WriteData::Generate(1, seed0)
3299                                                                                                                                         << op::WriteData::Generate(2, seed1)
3300                                                                                                                                         << op::Barrier()
3301                                                                                                                                         << op::ReadData::Generate(1, seed0)
3302                                                                                                                                         << op::ReadData::Generate(2, seed1)));
3303
3304                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3306                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3307                                                                                                                                  configs[configNdx].storage,
3308                                                                                                                                  configs[configNdx].flags,
3309                                                                                                                                  InterCallOperations()
3310                                                                                                                                         << op::WriteData::Generate(1, seed0)
3311                                                                                                                                         << op::WriteData::Generate(2, seed1)
3312                                                                                                                                         << op::Barrier()
3313                                                                                                                                         << op::ReadData::Generate(2, seed1)
3314                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3315                         }
3316
3317                         // .without_memory_barrier
3318                         {
3319                                 struct InvocationConfig
3320                                 {
3321                                         const char*     name;
3322                                         int                     count;
3323                                 };
3324
3325                                 static const InvocationConfig ssboInvocations[] =
3326                                 {
3327                                         { "1k",         1024    },
3328                                         { "4k",         4096    },
3329                                         { "32k",        32768   },
3330                                 };
3331                                 static const InvocationConfig imageInvocations[] =
3332                                 {
3333                                         { "8x8",                8       },
3334                                         { "32x32",              32      },
3335                                         { "128x128",    128     },
3336                                 };
3337                                 static const InvocationConfig counterInvocations[] =
3338                                 {
3339                                         { "32",         32              },
3340                                         { "128",        128             },
3341                                         { "1k",         1024    },
3342                                 };
3343                                 static const int callCounts[] = { 2, 5, 100 };
3344
3345                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3346                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3347                                                 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),       "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3348
3349                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3350                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3351                                                 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),    "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3352
3353                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3354                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3355                                                 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),      "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3356
3357                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3358                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3359                                                 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),  "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3360                         }
3361                 }
3362         }
3363 }
3364
3365 } // Functional
3366 } // gles31
3367 } // deqp