Fix PIPELINE_STAGE_TOP_OF_PIPE_BIT usage in api tests
[platform/upstream/VK-GL-CTS.git] / modules / gles31 / functional / es31fBasicComputeShaderTests.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Basic Compute Shader Tests.
22  *//*--------------------------------------------------------------------*/
23
24 #include "es31fBasicComputeShaderTests.hpp"
25 #include "gluShaderProgram.hpp"
26 #include "gluObjectWrapper.hpp"
27 #include "gluRenderContext.hpp"
28 #include "gluProgramInterfaceQuery.hpp"
29 #include "gluContextInfo.hpp"
30 #include "glwFunctions.hpp"
31 #include "glwEnums.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
35 #include "deMemory.h"
36
37 namespace deqp
38 {
39 namespace gles31
40 {
41 namespace Functional
42 {
43
44 using std::string;
45 using std::vector;
46 using tcu::TestLog;
47 using namespace glu;
48
49 //! Utility for mapping buffers.
50 class BufferMemMap
51 {
52 public:
53         BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access)
54                 : m_gl          (gl)
55                 , m_target      (target)
56                 , m_ptr         (DE_NULL)
57         {
58                 m_ptr = gl.mapBufferRange(target, offset, size, access);
59                 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
60                 TCU_CHECK(m_ptr);
61         }
62
63         ~BufferMemMap (void)
64         {
65                 m_gl.unmapBuffer(m_target);
66         }
67
68         void*   getPtr          (void) const { return m_ptr; }
69         void*   operator*       (void) const { return m_ptr; }
70
71 private:
72                                                         BufferMemMap                    (const BufferMemMap& other);
73         BufferMemMap&                   operator=                               (const BufferMemMap& other);
74
75         const glw::Functions&   m_gl;
76         const deUint32                  m_target;
77         void*                                   m_ptr;
78 };
79
80 namespace
81 {
82
83 class EmptyComputeShaderCase : public TestCase
84 {
85 public:
86         EmptyComputeShaderCase (Context& context)
87                 : TestCase(context, "empty", "Empty shader")
88         {
89         }
90
91         IterateResult iterate (void)
92         {
93                 const GLSLVersion       glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
94                 std::ostringstream      src;
95
96                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
97                         << "layout (local_size_x = 1) in;\n"
98                            "void main (void) {}\n";
99
100                 const ShaderProgram program(m_context.getRenderContext(),
101                         ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
102
103                 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
104
105                 m_testCtx.getLog() << program;
106                 if (!program.isOk())
107                         TCU_FAIL("Compile failed");
108
109                 gl.useProgram(program.getProgram());
110                 gl.dispatchCompute(1, 1, 1);
111                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
112
113                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
114                 return STOP;
115         }
116 };
117
118 class UBOToSSBOInvertCase : public TestCase
119 {
120 public:
121         UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
122                 : TestCase              (context, name, description)
123                 , m_numValues   (numValues)
124                 , m_localSize   (localSize)
125                 , m_workSize    (workSize)
126         {
127                 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
128         }
129
130         IterateResult iterate (void)
131         {
132                 const GLSLVersion       glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
133                 std::ostringstream      src;
134
135                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
136                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
137                         << "uniform Input {\n"
138                         << "    uint values[" << m_numValues << "];\n"
139                         << "} ub_in;\n"
140                         << "layout(binding = 1) buffer Output {\n"
141                         << "    uint values[" << m_numValues << "];\n"
142                         << "} sb_out;\n"
143                         << "void main (void) {\n"
144                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
145                         << "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
146                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
147                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
148                         << "\n"
149                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
150                         << "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
151                         << "}\n";
152
153                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
154                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
155                 const Buffer                            inputBuffer             (m_context.getRenderContext());
156                 const Buffer                            outputBuffer    (m_context.getRenderContext());
157                 std::vector<deUint32>           inputValues             (m_numValues);
158
159                 // Compute input values.
160                 {
161                         de::Random rnd(0x111223f);
162                         for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
163                                 inputValues[ndx] = rnd.getUint32();
164                 }
165
166                 m_testCtx.getLog() << program;
167                 if (!program.isOk())
168                         TCU_FAIL("Compile failed");
169
170                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
171
172                 gl.useProgram(program.getProgram());
173
174                 // Input buffer setup
175                 {
176                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input");
177                         const InterfaceBlockInfo        blockInfo       = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex);
178                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values");
179                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex);
180
181                         gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer);
182                         gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
183
184                         {
185                                 const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
186
187                                 for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++)
188                                         *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
189                         }
190
191                         gl.uniformBlockBinding(program.getProgram(), blockIndex, 0);
192                         gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer);
193                         GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
194                 }
195
196                 // Output buffer setup
197                 {
198                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
199                         const int                       blockSize               = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
200
201                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
202                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
203                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer);
204                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
205                 }
206
207                 // Dispatch compute workload
208                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
209                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
210
211                 // Read back and compare
212                 {
213                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
214                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
215                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
216                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
217                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
218
219                         TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
220                         for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
221                         {
222                                 const deUint32  res             = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
223                                 const deUint32  ref             = ~inputValues[ndx];
224
225                                 if (res != ref)
226                                         throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
227                         }
228                 }
229
230                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
231                 return STOP;
232         }
233
234 private:
235         const int                       m_numValues;
236         const tcu::IVec3        m_localSize;
237         const tcu::IVec3        m_workSize;
238 };
239
240 class CopyInvertSSBOCase : public TestCase
241 {
242 public:
243         CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
244                 : TestCase              (context, name, description)
245                 , m_numValues   (numValues)
246                 , m_localSize   (localSize)
247                 , m_workSize    (workSize)
248         {
249                 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
250         }
251
252         IterateResult iterate (void)
253         {
254                 const GLSLVersion       glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
255                 std::ostringstream      src;
256
257                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
258                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
259                         << "layout(binding = 0) buffer Input {\n"
260                         << "    uint values[" << m_numValues << "];\n"
261                         << "} sb_in;\n"
262                         << "layout (binding = 1) buffer Output {\n"
263                         << "    uint values[" << m_numValues << "];\n"
264                         << "} sb_out;\n"
265                         << "void main (void) {\n"
266                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
267                         << "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
268                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
269                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
270                         << "\n"
271                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
272                         << "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
273                         << "}\n";
274
275                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
276                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
277                 const Buffer                            inputBuffer             (m_context.getRenderContext());
278                 const Buffer                            outputBuffer    (m_context.getRenderContext());
279                 std::vector<deUint32>           inputValues             (m_numValues);
280
281                 // Compute input values.
282                 {
283                         de::Random rnd(0x124fef);
284                         for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
285                                 inputValues[ndx] = rnd.getUint32();
286                 }
287
288                 m_testCtx.getLog() << program;
289                 if (!program.isOk())
290                         TCU_FAIL("Compile failed");
291
292                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
293
294                 gl.useProgram(program.getProgram());
295
296                 // Input buffer setup
297                 {
298                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
299                         const InterfaceBlockInfo        blockInfo       = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
300                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
301                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
302
303                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
304                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
305
306                         TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
307
308                         {
309                                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
310
311                                 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
312                                         *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
313                         }
314
315                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
316                         GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
317                 }
318
319                 // Output buffer setup
320                 {
321                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
322                         const InterfaceBlockInfo        blockInfo       = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
323
324                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
325                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ);
326                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer);
327                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
328                 }
329
330                 // Dispatch compute workload
331                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
332                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
333
334                 // Read back and compare
335                 {
336                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
337                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
338                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
339                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
340                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
341
342                         TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
343                         for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
344                         {
345                                 const deUint32  res             = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
346                                 const deUint32  ref             = ~inputValues[ndx];
347
348                                 if (res != ref)
349                                         throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
350                         }
351                 }
352
353                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
354                 return STOP;
355         }
356
357 private:
358         const int                       m_numValues;
359         const tcu::IVec3        m_localSize;
360         const tcu::IVec3        m_workSize;
361 };
362
363 class InvertSSBOInPlaceCase : public TestCase
364 {
365 public:
366         InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
367                 : TestCase              (context, name, description)
368                 , m_numValues   (numValues)
369                 , m_isSized             (isSized)
370                 , m_localSize   (localSize)
371                 , m_workSize    (workSize)
372         {
373                 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
374         }
375
376         IterateResult iterate (void)
377         {
378                 const GLSLVersion       glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
379                 std::ostringstream      src;
380
381                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
382                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
383                         << "layout(binding = 0) buffer InOut {\n"
384                         << "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
385                         << "} sb_inout;\n"
386                         << "void main (void) {\n"
387                         << "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
388                         << "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
389                         << "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
390                         << "    uint offset          = numValuesPerInv*groupNdx;\n"
391                         << "\n"
392                         << "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
393                         << "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
394                         << "}\n";
395
396                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
397                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
398
399                 m_testCtx.getLog() << program;
400                 if (!program.isOk())
401                         TCU_FAIL("Compile failed");
402
403                 const Buffer                            outputBuffer    (m_context.getRenderContext());
404                 const deUint32                          valueIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values");
405                 const InterfaceVariableInfo     valueInfo               = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
406                 const deUint32                          blockSize               = valueInfo.arrayStride*(deUint32)m_numValues;
407                 std::vector<deUint32>           inputValues             (m_numValues);
408
409                 // Compute input values.
410                 {
411                         de::Random rnd(0x82ce7f);
412                         for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
413                                 inputValues[ndx] = rnd.getUint32();
414                 }
415
416                 TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
417
418                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
419
420                 gl.useProgram(program.getProgram());
421
422                 // Output buffer setup
423                 {
424                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
425                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW);
426
427                         {
428                                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT);
429
430                                 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
431                                         *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
432                         }
433
434                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
435                         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
436                 }
437
438                 // Dispatch compute workload
439                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
440                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
441
442                 // Read back and compare
443                 {
444                         const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
445
446                         for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
447                         {
448                                 const deUint32  res             = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
449                                 const deUint32  ref             = ~inputValues[ndx];
450
451                                 if (res != ref)
452                                         throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]");
453                         }
454                 }
455
456                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
457                 return STOP;
458         }
459
460 private:
461         const int                       m_numValues;
462         const bool                      m_isSized;
463         const tcu::IVec3        m_localSize;
464         const tcu::IVec3        m_workSize;
465 };
466
467 class WriteToMultipleSSBOCase : public TestCase
468 {
469 public:
470         WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
471                 : TestCase              (context, name, description)
472                 , m_numValues   (numValues)
473                 , m_isSized             (isSized)
474                 , m_localSize   (localSize)
475                 , m_workSize    (workSize)
476         {
477                 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
478         }
479
480         IterateResult iterate (void)
481         {
482                 const GLSLVersion       glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
483                 std::ostringstream      src;
484
485                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
486                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
487                         << "layout(binding = 0) buffer Out0 {\n"
488                         << "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
489                         << "} sb_out0;\n"
490                         << "layout(binding = 1) buffer Out1 {\n"
491                         << "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
492                         << "} sb_out1;\n"
493                         << "void main (void) {\n"
494                         << "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
495                         << "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
496                         << "\n"
497                         << "    {\n"
498                         << "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
499                         << "        uint offset          = numValuesPerInv*groupNdx;\n"
500                         << "\n"
501                         << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
502                         << "            sb_out0.values[offset + ndx] = offset + ndx;\n"
503                         << "    }\n"
504                         << "    {\n"
505                         << "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
506                         << "        uint offset          = numValuesPerInv*groupNdx;\n"
507                         << "\n"
508                         << "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
509                         << "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
510                         << "    }\n"
511                         << "}\n";
512
513                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
514                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
515
516                 m_testCtx.getLog() << program;
517                 if (!program.isOk())
518                         TCU_FAIL("Compile failed");
519
520                 const Buffer                            outputBuffer0   (m_context.getRenderContext());
521                 const deUint32                          value0Index             = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values");
522                 const InterfaceVariableInfo     value0Info              = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index);
523                 const deUint32                          block0Size              = value0Info.arrayStride*(deUint32)m_numValues;
524
525                 const Buffer                            outputBuffer1   (m_context.getRenderContext());
526                 const deUint32                          value1Index             = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values");
527                 const InterfaceVariableInfo     value1Info              = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index);
528                 const deUint32                          block1Size              = value1Info.arrayStride*(deUint32)m_numValues;
529
530                 TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
531                 TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
532
533                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
534
535                 gl.useProgram(program.getProgram());
536
537                 // Output buffer setup
538                 {
539                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
540                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW);
541
542                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0);
543                         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
544                 }
545                 {
546                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
547                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW);
548
549                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1);
550                         GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
551                 }
552
553                 // Dispatch compute workload
554                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
555                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
556
557                 // Read back and compare
558                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
559                 {
560                         const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT);
561
562                         for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
563                         {
564                                 const deUint32  res             = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx));
565                                 const deUint32  ref             = ndx;
566
567                                 if (res != ref)
568                                         throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
569                         }
570                 }
571                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
572                 {
573                         const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT);
574
575                         for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
576                         {
577                                 const deUint32  res             = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx));
578                                 const deUint32  ref             = m_numValues - ndx;
579
580                                 if (res != ref)
581                                         throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
582                         }
583                 }
584                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
585                 return STOP;
586         }
587
588 private:
589         const int                       m_numValues;
590         const bool                      m_isSized;
591         const tcu::IVec3        m_localSize;
592         const tcu::IVec3        m_workSize;
593 };
594
595 class SSBOLocalBarrierCase : public TestCase
596 {
597 public:
598         SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
599                 : TestCase              (context, name, description)
600                 , m_localSize   (localSize)
601                 , m_workSize    (workSize)
602         {
603         }
604
605         IterateResult iterate (void)
606         {
607                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
608                 const Buffer                            outputBuffer    (m_context.getRenderContext());
609                 const int                                       workGroupSize   = m_localSize[0]*m_localSize[1]*m_localSize[2];
610                 const int                                       workGroupCount  = m_workSize[0]*m_workSize[1]*m_workSize[2];
611                 const int                                       numValues               = workGroupSize*workGroupCount;
612
613                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
614                 std::ostringstream                      src;
615
616                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
617                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
618                         << "layout(binding = 0) buffer Output {\n"
619                         << "    coherent uint values[" << numValues << "];\n"
620                         << "} sb_out;\n\n"
621                         << "shared uint offsets[" << workGroupSize << "];\n\n"
622                         << "void main (void) {\n"
623                         << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
624                         << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
625                         << "    uint globalOffs = localSize*globalNdx;\n"
626                         << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
627                         << "\n"
628                         << "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
629                         << "    memoryBarrierBuffer();\n"
630                         << "    barrier();\n"
631                         << "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"
632                         << "    memoryBarrierBuffer();\n"
633                         << "    barrier();\n"
634                         << "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
635                         << "}\n";
636
637                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
638
639                 m_testCtx.getLog() << program;
640                 if (!program.isOk())
641                         TCU_FAIL("Compile failed");
642
643                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
644
645                 gl.useProgram(program.getProgram());
646
647                 // Output buffer setup
648                 {
649                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
650                         const int                       blockSize               = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
651
652                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
653                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
654                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
655                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
656                 }
657
658                 // Dispatch compute workload
659                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
660                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
661
662                 // Read back and compare
663                 {
664                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
665                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
666                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
667                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
668                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
669
670                         for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
671                         {
672                                 for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
673                                 {
674                                         const int               globalOffs      = groupNdx*workGroupSize;
675                                         const deUint32  res                     = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
676                                         const int               offs0           = localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize);
677                                         const int               offs1           = localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize);
678                                         const deUint32  ref                     = (deUint32)(globalOffs + offs0 + offs1);
679
680                                         if (res != ref)
681                                                 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
682                                 }
683                         }
684                 }
685
686                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
687                 return STOP;
688         }
689
690 private:
691         const tcu::IVec3        m_localSize;
692         const tcu::IVec3        m_workSize;
693 };
694
695 class SSBOBarrierCase : public TestCase
696 {
697 public:
698         SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize)
699                 : TestCase              (context, name, description)
700                 , m_workSize    (workSize)
701         {
702         }
703
704         IterateResult iterate (void)
705         {
706                 const GLSLVersion       glslVersion                             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
707                 const char* const       glslVersionDeclaration  = getGLSLVersionDeclaration(glslVersion);
708
709                 std::ostringstream src0;
710                 src0 << glslVersionDeclaration << "\n"
711                          << "layout (local_size_x = 1) in;\n"
712                                                   "uniform uint u_baseVal;\n"
713                                                   "layout(binding = 1) buffer Output {\n"
714                                                   "    uint values[];\n"
715                                                   "};\n"
716                                                   "void main (void) {\n"
717                                                   "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
718                                                   "    values[offset] = u_baseVal+offset;\n"
719                                 "}\n";
720
721                 std::ostringstream src1;
722                 src1 << glslVersionDeclaration << "\n"
723                          << "layout (local_size_x = 1) in;\n"
724                                                   "uniform uint u_baseVal;\n"
725                                                   "layout(binding = 1) buffer Input {\n"
726                                                   "    uint values[];\n"
727                                                   "};\n"
728                                                   "layout(binding = 0) buffer Output {\n"
729                                                   "    coherent uint sum;\n"
730                                                   "};\n"
731                                                   "void main (void) {\n"
732                                                   "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
733                                                   "    uint value  = values[offset];\n"
734                                                   "    atomicAdd(sum, value);\n"
735                                 "}\n";
736
737                 const ShaderProgram                     program0                (m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str()));
738                 const ShaderProgram                     program1                (m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str()));
739
740                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
741                 const Buffer                            tempBuffer              (m_context.getRenderContext());
742                 const Buffer                            outputBuffer    (m_context.getRenderContext());
743                 const deUint32                          baseValue               = 127;
744
745                 m_testCtx.getLog() << program0 << program1;
746                 if (!program0.isOk() || !program1.isOk())
747                         TCU_FAIL("Compile failed");
748
749                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
750
751                 // Temp buffer setup
752                 {
753                         const deUint32                          valueIndex              = gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]");
754                         const InterfaceVariableInfo     valueInfo               = getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
755                         const deUint32                          bufferSize              = valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2];
756
757                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer);
758                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW);
759                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer);
760                         GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed");
761                 }
762
763                 // Output buffer setup
764                 {
765                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
766                         const int                       blockSize               = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
767
768                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
769                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
770
771                         {
772                                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
773                                 deMemset(bufMap.getPtr(), 0, blockSize);
774                         }
775
776                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
777                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
778                 }
779
780                 // Dispatch compute workload
781                 gl.useProgram(program0.getProgram());
782                 gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
783                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
784                 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
785                 gl.useProgram(program1.getProgram());
786                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
787                 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
788
789                 // Read back and compare
790                 {
791                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
792                         const int                                       blockSize       = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
793                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
794                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
795                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
796
797                         const deUint32                          res                     = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
798                         deUint32                                        ref                     = 0;
799
800                         for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++)
801                                 ref += baseValue + (deUint32)ndx;
802
803                         if (res != ref)
804                         {
805                                 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
806                                 throw tcu::TestError("Comparison failed");
807                         }
808                 }
809
810                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
811                 return STOP;
812         }
813
814 private:
815         const tcu::IVec3        m_workSize;
816 };
817
818 class BasicSharedVarCase : public TestCase
819 {
820 public:
821         BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
822                 : TestCase              (context, name, description)
823                 , m_localSize   (localSize)
824                 , m_workSize    (workSize)
825         {
826         }
827
828         IterateResult iterate (void)
829         {
830                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
831                 const Buffer                            outputBuffer    (m_context.getRenderContext());
832                 const int                                       workGroupSize   = m_localSize[0]*m_localSize[1]*m_localSize[2];
833                 const int                                       workGroupCount  = m_workSize[0]*m_workSize[1]*m_workSize[2];
834                 const int                                       numValues               = workGroupSize*workGroupCount;
835
836                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
837                 std::ostringstream                      src;
838
839                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
840                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
841                         << "layout(binding = 0) buffer Output {\n"
842                         << "    uint values[" << numValues << "];\n"
843                         << "} sb_out;\n\n"
844                         << "shared uint offsets[" << workGroupSize << "];\n\n"
845                         << "void main (void) {\n"
846                         << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
847                         << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
848                         << "    uint globalOffs = localSize*globalNdx;\n"
849                         << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
850                         << "\n"
851                         << "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
852                         << "    barrier();\n"
853                         << "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
854                         << "}\n";
855
856                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
857
858                 m_testCtx.getLog() << program;
859                 if (!program.isOk())
860                         TCU_FAIL("Compile failed");
861
862                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
863
864                 gl.useProgram(program.getProgram());
865
866                 // Output buffer setup
867                 {
868                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
869                         const int                       blockSize               = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
870
871                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
872                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
873                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
874                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
875                 }
876
877                 // Dispatch compute workload
878                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
879                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
880
881                 // Read back and compare
882                 {
883                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
884                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
885                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
886                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
887                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
888
889                         for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
890                         {
891                                 for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
892                                 {
893                                         const int               globalOffs      = groupNdx*workGroupSize;
894                                         const deUint32  res                     = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
895                                         const deUint32  ref                     = (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1));
896
897                                         if (res != ref)
898                                                 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
899                                 }
900                         }
901                 }
902
903                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
904                 return STOP;
905         }
906
907 private:
908         const tcu::IVec3        m_localSize;
909         const tcu::IVec3        m_workSize;
910 };
911
912 class SharedVarAtomicOpCase : public TestCase
913 {
914 public:
915         SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
916                 : TestCase              (context, name, description)
917                 , m_localSize   (localSize)
918                 , m_workSize    (workSize)
919         {
920         }
921
922         IterateResult iterate (void)
923         {
924                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
925                 const Buffer                            outputBuffer    (m_context.getRenderContext());
926                 const int                                       workGroupSize   = m_localSize[0]*m_localSize[1]*m_localSize[2];
927                 const int                                       workGroupCount  = m_workSize[0]*m_workSize[1]*m_workSize[2];
928                 const int                                       numValues               = workGroupSize*workGroupCount;
929
930                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
931                 std::ostringstream                      src;
932
933                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
934                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
935                         << "layout(binding = 0) buffer Output {\n"
936                         << "    uint values[" << numValues << "];\n"
937                         << "} sb_out;\n\n"
938                         << "shared uint count;\n\n"
939                         << "void main (void) {\n"
940                         << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
941                         << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
942                         << "    uint globalOffs = localSize*globalNdx;\n"
943                         << "\n"
944                         << "    count = 0u;\n"
945                         << "    barrier();\n"
946                         << "    uint oldVal = atomicAdd(count, 1u);\n"
947                         << "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
948                         << "}\n";
949
950                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
951
952                 m_testCtx.getLog() << program;
953                 if (!program.isOk())
954                         TCU_FAIL("Compile failed");
955
956                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
957
958                 gl.useProgram(program.getProgram());
959
960                 // Output buffer setup
961                 {
962                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
963                         const int                       blockSize               = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
964
965                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
966                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
967                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
968                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
969                 }
970
971                 // Dispatch compute workload
972                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
973                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
974
975                 // Read back and compare
976                 {
977                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
978                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
979                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
980                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
981                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
982
983                         for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
984                         {
985                                 for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
986                                 {
987                                         const int               globalOffs      = groupNdx*workGroupSize;
988                                         const deUint32  res                     = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
989                                         const deUint32  ref                     = (deUint32)(localOffs+1);
990
991                                         if (res != ref)
992                                                 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
993                                 }
994                         }
995                 }
996
997                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
998                 return STOP;
999         }
1000
1001 private:
1002         const tcu::IVec3        m_localSize;
1003         const tcu::IVec3        m_workSize;
1004 };
1005
1006 class CopyImageToSSBOCase : public TestCase
1007 {
1008 public:
1009         CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
1010                 : TestCase              (context, name, description)
1011                 , m_localSize   (localSize)
1012                 , m_imageSize   (imageSize)
1013         {
1014                 DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1015                 DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1016         }
1017
1018         IterateResult iterate (void)
1019         {
1020                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1021                 std::ostringstream                      src;
1022
1023                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
1024                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1025                         << "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n"
1026                         << "layout(binding = 0) buffer Output {\n"
1027                         << "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1028                         << "} sb_out;\n\n"
1029                         << "void main (void) {\n"
1030                         << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1031                         << "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
1032                         << "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
1033                         << "}\n";
1034
1035                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
1036                 const Buffer                            outputBuffer    (m_context.getRenderContext());
1037                 const Texture                           inputTexture    (m_context.getRenderContext());
1038                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1039                 const tcu::IVec2                        workSize                = m_imageSize / m_localSize;
1040                 de::Random                                      rnd                             (0xab2c7);
1041                 vector<deUint32>                        inputValues             (m_imageSize[0]*m_imageSize[1]);
1042
1043                 m_testCtx.getLog() << program;
1044                 if (!program.isOk())
1045                         TCU_FAIL("Compile failed");
1046
1047                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1048
1049                 gl.useProgram(program.getProgram());
1050
1051                 // Input values
1052                 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1053                         *i = rnd.getUint32();
1054
1055                 // Input image setup
1056                 gl.bindTexture(GL_TEXTURE_2D, *inputTexture);
1057                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1058                 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]);
1059                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1060                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1061                 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1062
1063                 // Bind to unit 1
1064                 gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
1065                 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1066
1067                 // Output buffer setup
1068                 {
1069                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1070                         const int                       blockSize               = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1071
1072                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1073                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1074                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1075                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1076                 }
1077
1078                 // Dispatch compute workload
1079                 gl.dispatchCompute(workSize[0], workSize[1], 1);
1080                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1081
1082                 // Read back and compare
1083                 {
1084                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1085                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1086                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1087                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1088                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1089
1090                         TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1091
1092                         for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
1093                         {
1094                                 const deUint32  res             = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
1095                                 const deUint32  ref             = inputValues[ndx];
1096
1097                                 if (res != ref)
1098                                         throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
1099                         }
1100                 }
1101
1102                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1103                 return STOP;
1104         }
1105
1106 private:
1107         const tcu::IVec2        m_localSize;
1108         const tcu::IVec2        m_imageSize;
1109 };
1110
1111 class CopySSBOToImageCase : public TestCase
1112 {
1113 public:
1114         CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
1115                 : TestCase              (context, name, description)
1116                 , m_localSize   (localSize)
1117                 , m_imageSize   (imageSize)
1118         {
1119                 DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1120                 DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1121         }
1122
1123         IterateResult iterate (void)
1124         {
1125                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1126                 std::ostringstream                      src;
1127
1128                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
1129                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1130                         << "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n"
1131                         << "buffer Input {\n"
1132                         << "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1133                         << "} sb_in;\n\n"
1134                         << "void main (void) {\n"
1135                         << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1136                         << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1137                         << "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
1138                         << "}\n";
1139
1140                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
1141                 const Buffer                            inputBuffer             (m_context.getRenderContext());
1142                 const Texture                           outputTexture   (m_context.getRenderContext());
1143                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1144                 const tcu::IVec2                        workSize                = m_imageSize / m_localSize;
1145                 de::Random                                      rnd                             (0x77238ac2);
1146                 vector<deUint32>                        inputValues             (m_imageSize[0]*m_imageSize[1]);
1147
1148                 m_testCtx.getLog() << program;
1149                 if (!program.isOk())
1150                         TCU_FAIL("Compile failed");
1151
1152                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1153
1154                 gl.useProgram(program.getProgram());
1155
1156                 // Input values
1157                 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1158                         *i = rnd.getUint32();
1159
1160                 // Input buffer setup
1161                 {
1162                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1163                         const InterfaceBlockInfo        blockInfo       = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1164                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1165                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1166
1167                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1168                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1169
1170                         TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1171
1172                         {
1173                                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1174
1175                                 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1176                                         *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1177                         }
1178
1179                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1180                         GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1181                 }
1182
1183                 // Output image setup
1184                 gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1185                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1186                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1187                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1188                 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1189
1190                 // Bind to unit 1
1191                 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1192                 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1193
1194                 // Dispatch compute workload
1195                 gl.dispatchCompute(workSize[0], workSize[1], 1);
1196                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1197
1198                 // Read back and compare
1199                 {
1200                         Framebuffer                     fbo                     (m_context.getRenderContext());
1201                         vector<deUint32>        pixels          (inputValues.size()*4);
1202
1203                         gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1204                         gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1205                         TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1206
1207                         // \note In ES3 we have to use GL_RGBA_INTEGER
1208                         gl.readBuffer(GL_COLOR_ATTACHMENT0);
1209                         gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1210                         GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1211
1212                         for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1213                         {
1214                                 const deUint32  res             = pixels[ndx*4];
1215                                 const deUint32  ref             = inputValues[ndx];
1216
1217                                 if (res != ref)
1218                                         throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx));
1219                         }
1220                 }
1221
1222                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1223                 return STOP;
1224         }
1225
1226 private:
1227         const tcu::IVec2        m_localSize;
1228         const tcu::IVec2        m_imageSize;
1229 };
1230
1231 class ImageAtomicOpCase : public TestCase
1232 {
1233 public:
1234         ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize)
1235                 : TestCase              (context, name, description)
1236                 , m_localSize   (localSize)
1237                 , m_imageSize   (imageSize)
1238         {
1239         }
1240
1241         void init (void)
1242         {
1243                 if (!glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2)))
1244                         if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1245                                 throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension");
1246         }
1247
1248         IterateResult iterate (void)
1249         {
1250                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1251                 const bool                                      supportsES32    = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
1252                 std::ostringstream                      src;
1253
1254                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
1255                         << (supportsES32 ? "\n" : "#extension GL_OES_shader_image_atomic : require\n")
1256                         << "layout (local_size_x = " << m_localSize << ") in;\n"
1257                         << "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n"
1258                         << "buffer Input {\n"
1259                         << "    uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n"
1260                         << "} sb_in;\n\n"
1261                         << "void main (void) {\n"
1262                         << "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1263                         << "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1264                         << "\n"
1265                         << "    if (gl_LocalInvocationIndex == 0u)\n"
1266                         << "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1267                         << "    barrier();\n"
1268                         << "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1269                         << "}\n";
1270
1271                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
1272                 const Buffer                            inputBuffer             (m_context.getRenderContext());
1273                 const Texture                           outputTexture   (m_context.getRenderContext());
1274                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1275                 de::Random                                      rnd                             (0x77238ac2);
1276                 vector<deUint32>                        inputValues             (m_imageSize[0]*m_imageSize[1]*m_localSize);
1277
1278                 m_testCtx.getLog() << program;
1279                 if (!program.isOk())
1280                         TCU_FAIL("Compile failed");
1281
1282                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage;
1283
1284                 gl.useProgram(program.getProgram());
1285
1286                 // Input values
1287                 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1288                         *i = rnd.getUint32();
1289
1290                 // Input buffer setup
1291                 {
1292                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1293                         const InterfaceBlockInfo        blockInfo       = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1294                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1295                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1296
1297                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1298                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1299
1300                         TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1301
1302                         {
1303                                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1304
1305                                 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1306                                         *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1307                         }
1308
1309                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1310                         GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1311                 }
1312
1313                 // Output image setup
1314                 gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1315                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1316                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1317                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1318                 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1319
1320                 // Bind to unit 1
1321                 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1322                 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1323
1324                 // Dispatch compute workload
1325                 gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1);
1326                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1327
1328                 // Read back and compare
1329                 {
1330                         Framebuffer                     fbo                     (m_context.getRenderContext());
1331                         vector<deUint32>        pixels          (m_imageSize[0]*m_imageSize[1]*4);
1332
1333                         gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1334                         gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1335                         TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1336
1337                         // \note In ES3 we have to use GL_RGBA_INTEGER
1338                         gl.readBuffer(GL_COLOR_ATTACHMENT0);
1339                         gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1340                         GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1341
1342                         for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++)
1343                         {
1344                                 const deUint32  res             = pixels[pixelNdx*4];
1345                                 deUint32                ref             = 0;
1346
1347                                 for (int offs = 0; offs < m_localSize; offs++)
1348                                         ref += inputValues[pixelNdx*m_localSize + offs];
1349
1350                                 if (res != ref)
1351                                         throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx));
1352                         }
1353                 }
1354
1355                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1356                 return STOP;
1357         }
1358
1359 private:
1360         const int                       m_localSize;
1361         const tcu::IVec2        m_imageSize;
1362 };
1363
1364 class ImageBarrierCase : public TestCase
1365 {
1366 public:
1367         ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize)
1368                 : TestCase              (context, name, description)
1369                 , m_workSize    (workSize)
1370         {
1371         }
1372
1373         IterateResult iterate (void)
1374         {
1375                 const GLSLVersion                       glslVersion                             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1376                 const char* const                       glslVersionDeclaration  = getGLSLVersionDeclaration(glslVersion);
1377
1378                 std::ostringstream src0;
1379                 src0 << glslVersionDeclaration << "\n"
1380                          << "layout (local_size_x = 1) in;\n"
1381                                                   "uniform uint u_baseVal;\n"
1382                                                   "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n"
1383                                                   "void main (void) {\n"
1384                                                   "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1385                                                   "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n"
1386                                 "}\n";
1387
1388                 std::ostringstream src1;
1389                 src1 << glslVersionDeclaration << "\n"
1390                          << "layout (local_size_x = 1) in;\n"
1391                                                   "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n"
1392                                                   "layout(binding = 0) buffer Output {\n"
1393                                                   "    coherent uint sum;\n"
1394                                                   "};\n"
1395                                                   "void main (void) {\n"
1396                                                   "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
1397                                                   "    atomicAdd(sum, value);\n"
1398                                 "}\n";
1399
1400                 const ShaderProgram                     program0                (m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str()));
1401                 const ShaderProgram                     program1                (m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str()));
1402
1403                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
1404                 const Texture                           tempTexture             (m_context.getRenderContext());
1405                 const Buffer                            outputBuffer    (m_context.getRenderContext());
1406                 const deUint32                          baseValue               = 127;
1407
1408                 m_testCtx.getLog() << program0 << program1;
1409                 if (!program0.isOk() || !program1.isOk())
1410                         TCU_FAIL("Compile failed");
1411
1412                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1413
1414                 // Temp texture setup
1415                 gl.bindTexture(GL_TEXTURE_2D, *tempTexture);
1416                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]);
1417                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1418                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1419                 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1420
1421                 // Bind to unit 2
1422                 gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1423                 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1424
1425                 // Output buffer setup
1426                 {
1427                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1428                         const int                       blockSize               = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1429
1430                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1431                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1432
1433                         {
1434                                 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
1435                                 deMemset(bufMap.getPtr(), 0, blockSize);
1436                         }
1437
1438                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1439                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1440                 }
1441
1442                 // Dispatch compute workload
1443                 gl.useProgram(program0.getProgram());
1444                 gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
1445                 gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1446                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1447                 gl.useProgram(program1.getProgram());
1448                 gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1449                 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
1450
1451                 // Read back and compare
1452                 {
1453                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1454                         const int                                       blockSize       = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1455                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
1456                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1457                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1458
1459                         const deUint32                          res                     = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
1460                         deUint32                                        ref                     = 0;
1461
1462                         for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++)
1463                                 ref += baseValue + (deUint32)ndx;
1464
1465                         if (res != ref)
1466                         {
1467                                 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
1468                                 throw tcu::TestError("Comparison failed");
1469                         }
1470                 }
1471
1472                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1473                 return STOP;
1474         }
1475
1476 private:
1477         const tcu::IVec2        m_workSize;
1478 };
1479
1480 class AtomicCounterCase : public TestCase
1481 {
1482 public:
1483         AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
1484                 : TestCase              (context, name, description)
1485                 , m_localSize   (localSize)
1486                 , m_workSize    (workSize)
1487         {
1488         }
1489
1490         IterateResult iterate (void)
1491         {
1492                 const glw::Functions&           gl                              = m_context.getRenderContext().getFunctions();
1493                 const Buffer                            outputBuffer    (m_context.getRenderContext());
1494                 const Buffer                            counterBuffer   (m_context.getRenderContext());
1495                 const int                                       workGroupSize   = m_localSize[0]*m_localSize[1]*m_localSize[2];
1496                 const int                                       workGroupCount  = m_workSize[0]*m_workSize[1]*m_workSize[2];
1497                 const int                                       numValues               = workGroupSize*workGroupCount;
1498
1499                 const GLSLVersion                       glslVersion             = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType());
1500                 std::ostringstream                      src;
1501
1502                 src << getGLSLVersionDeclaration(glslVersion) << "\n"
1503                         << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
1504                         << "layout(binding = 0) buffer Output {\n"
1505                         << "    uint values[" << numValues << "];\n"
1506                         << "} sb_out;\n\n"
1507                         << "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n"
1508                         << "void main (void) {\n"
1509                         << "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
1510                         << "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1511                         << "    uint globalOffs = localSize*globalNdx;\n"
1512                         << "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
1513                         << "\n"
1514                         << "    uint oldVal = atomicCounterIncrement(u_count);\n"
1515                         << "    sb_out.values[globalOffs+localOffs] = oldVal;\n"
1516                         << "}\n";
1517
1518                 const ShaderProgram                     program                 (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
1519
1520                 m_testCtx.getLog() << program;
1521                 if (!program.isOk())
1522                         TCU_FAIL("Compile failed");
1523
1524                 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1525
1526                 gl.useProgram(program.getProgram());
1527
1528                 // Atomic counter buffer setup
1529                 {
1530                         const deUint32  uniformIndex    = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1531                         const deUint32  bufferIndex             = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1532                         const deUint32  bufferSize              = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1533
1534                         gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer);
1535                         gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ);
1536
1537                         {
1538                                 const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT);
1539                                 deMemset(memMap.getPtr(), 0, (int)bufferSize);
1540                         }
1541
1542                         gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer);
1543                         GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed");
1544                 }
1545
1546                 // Output buffer setup
1547                 {
1548                         const deUint32          blockIndex              = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1549                         const int                       blockSize               = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1550
1551                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1552                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1553                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1554                         GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1555                 }
1556
1557                 // Dispatch compute workload
1558                 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
1559                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1560
1561                 // Read back and compare atomic counter
1562                 {
1563                         const deUint32          uniformIndex    = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1564                         const deUint32          uniformOffset   = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET);
1565                         const deUint32          bufferIndex             = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1566                         const deUint32          bufferSize              = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1567                         const BufferMemMap      bufMap                  (gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT);
1568
1569                         const deUint32          resVal                  = *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset));
1570
1571                         if (resVal != (deUint32)numValues)
1572                                 throw tcu::TestError("Invalid atomic counter value");
1573                 }
1574
1575                 // Read back and compare SSBO
1576                 {
1577                         const deUint32                          blockIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1578                         const int                                       blockSize       = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1579                         const deUint32                          valueIndex      = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1580                         const InterfaceVariableInfo     valueInfo       = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1581                         const BufferMemMap                      bufMap          (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1582                         deUint32                                        valSum          = 0;
1583                         deUint32                                        refSum          = 0;
1584
1585                         for (int valNdx = 0; valNdx < numValues; valNdx++)
1586                         {
1587                                 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx));
1588
1589                                 valSum += res;
1590                                 refSum += (deUint32)valNdx;
1591
1592                                 if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues))
1593                                         throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]");
1594                         }
1595
1596                         if (valSum != refSum)
1597                                 throw tcu::TestError("Total sum of values in Output.values doesn't match");
1598                 }
1599
1600                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1601                 return STOP;
1602         }
1603
1604 private:
1605         const tcu::IVec3        m_localSize;
1606         const tcu::IVec3        m_workSize;
1607 };
1608
1609 } // anonymous
1610
1611 BasicComputeShaderTests::BasicComputeShaderTests (Context& context)
1612         : TestCaseGroup(context, "basic", "Basic Compute Shader Tests")
1613 {
1614 }
1615
1616 BasicComputeShaderTests::~BasicComputeShaderTests (void)
1617 {
1618 }
1619
1620 void BasicComputeShaderTests::init (void)
1621 {
1622         addChild(new EmptyComputeShaderCase(m_context));
1623
1624         addChild(new UBOToSSBOInvertCase        (m_context, "ubo_to_ssbo_single_invocation",                    "Copy from UBO to SSBO, inverting bits",        256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1625         addChild(new UBOToSSBOInvertCase        (m_context, "ubo_to_ssbo_single_group",                                 "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(2,1,4),      tcu::IVec3(1,1,1)));
1626         addChild(new UBOToSSBOInvertCase        (m_context, "ubo_to_ssbo_multiple_invocations",                 "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
1627         addChild(new UBOToSSBOInvertCase        (m_context, "ubo_to_ssbo_multiple_groups",                              "Copy from UBO to SSBO, inverting bits",        1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
1628
1629         addChild(new CopyInvertSSBOCase         (m_context, "copy_ssbo_single_invocation",                              "Copy between SSBOs, inverting bits",   256,    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1630         addChild(new CopyInvertSSBOCase         (m_context, "copy_ssbo_multiple_invocations",                   "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,1,1),      tcu::IVec3(2,4,1)));
1631         addChild(new CopyInvertSSBOCase         (m_context, "copy_ssbo_multiple_groups",                                "Copy between SSBOs, inverting bits",   1024,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
1632
1633         addChild(new InvertSSBOInPlaceCase      (m_context, "ssbo_rw_single_invocation",                                "Read and write same SSBO",                             256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1634         addChild(new InvertSSBOInPlaceCase      (m_context, "ssbo_rw_multiple_groups",                                  "Read and write same SSBO",                             1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
1635
1636         addChild(new InvertSSBOInPlaceCase      (m_context, "ssbo_unsized_arr_single_invocation",               "Read and write same SSBO",                             256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1637         addChild(new InvertSSBOInPlaceCase      (m_context, "ssbo_unsized_arr_multiple_groups",                 "Read and write same SSBO",                             1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
1638
1639         addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation",         "Write to multiple SSBOs",                              256,    true,   tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1640         addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups",           "Write to multiple SSBOs",                              1024,   true,   tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
1641
1642         addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs",                      256,    false,  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1643         addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups",   "Write to multiple SSBOs",                      1024,   false,  tcu::IVec3(1,4,2),      tcu::IVec3(2,2,4)));
1644
1645         addChild(new SSBOLocalBarrierCase       (m_context, "ssbo_local_barrier_single_invocation",             "SSBO local barrier usage",                             tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1646         addChild(new SSBOLocalBarrierCase       (m_context, "ssbo_local_barrier_single_group",                  "SSBO local barrier usage",                             tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
1647         addChild(new SSBOLocalBarrierCase       (m_context, "ssbo_local_barrier_multiple_groups",               "SSBO local barrier usage",                             tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
1648
1649         addChild(new SSBOBarrierCase            (m_context, "ssbo_cmd_barrier_single",                                  "SSBO memory barrier usage",                    tcu::IVec3(1,1,1)));
1650         addChild(new SSBOBarrierCase            (m_context, "ssbo_cmd_barrier_multiple",                                "SSBO memory barrier usage",                    tcu::IVec3(11,5,7)));
1651
1652         addChild(new BasicSharedVarCase         (m_context, "shared_var_single_invocation",                             "Basic shared variable usage",                  tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1653         addChild(new BasicSharedVarCase         (m_context, "shared_var_single_group",                                  "Basic shared variable usage",                  tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
1654         addChild(new BasicSharedVarCase         (m_context, "shared_var_multiple_invocations",                  "Basic shared variable usage",                  tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
1655         addChild(new BasicSharedVarCase         (m_context, "shared_var_multiple_groups",                               "Basic shared variable usage",                  tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
1656
1657         addChild(new SharedVarAtomicOpCase      (m_context, "shared_atomic_op_single_invocation",               "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1658         addChild(new SharedVarAtomicOpCase      (m_context, "shared_atomic_op_single_group",                    "Atomic operation with shared var",             tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
1659         addChild(new SharedVarAtomicOpCase      (m_context, "shared_atomic_op_multiple_invocations",    "Atomic operation with shared var",             tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
1660         addChild(new SharedVarAtomicOpCase      (m_context, "shared_atomic_op_multiple_groups",                 "Atomic operation with shared var",             tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
1661
1662         addChild(new CopyImageToSSBOCase        (m_context, "copy_image_to_ssbo_small",                                 "Image to SSBO copy",                                   tcu::IVec2(1,1),        tcu::IVec2(64,64)));
1663         addChild(new CopyImageToSSBOCase        (m_context, "copy_image_to_ssbo_large",                                 "Image to SSBO copy",                                   tcu::IVec2(2,4),        tcu::IVec2(512,512)));
1664
1665         addChild(new CopySSBOToImageCase        (m_context, "copy_ssbo_to_image_small",                                 "SSBO to image copy",                                   tcu::IVec2(1,1),        tcu::IVec2(64,64)));
1666         addChild(new CopySSBOToImageCase        (m_context, "copy_ssbo_to_image_large",                                 "SSBO to image copy",                                   tcu::IVec2(2,4),        tcu::IVec2(512,512)));
1667
1668         addChild(new ImageAtomicOpCase          (m_context, "image_atomic_op_local_size_1",                             "Atomic operation with image",                  1,      tcu::IVec2(64,64)));
1669         addChild(new ImageAtomicOpCase          (m_context, "image_atomic_op_local_size_8",                             "Atomic operation with image",                  8,      tcu::IVec2(64,64)));
1670
1671         addChild(new ImageBarrierCase           (m_context, "image_barrier_single",                                             "Image barrier",                                                tcu::IVec2(1,1)));
1672         addChild(new ImageBarrierCase           (m_context, "image_barrier_multiple",                                   "Image barrier",                                                tcu::IVec2(64,64)));
1673
1674         addChild(new AtomicCounterCase          (m_context, "atomic_counter_single_invocation",                 "Basic atomic counter test",                    tcu::IVec3(1,1,1),      tcu::IVec3(1,1,1)));
1675         addChild(new AtomicCounterCase          (m_context, "atomic_counter_single_group",                              "Basic atomic counter test",                    tcu::IVec3(3,2,5),      tcu::IVec3(1,1,1)));
1676         addChild(new AtomicCounterCase          (m_context, "atomic_counter_multiple_invocations",              "Basic atomic counter test",                    tcu::IVec3(1,1,1),      tcu::IVec3(2,5,4)));
1677         addChild(new AtomicCounterCase          (m_context, "atomic_counter_multiple_groups",                   "Basic atomic counter test",                    tcu::IVec3(3,4,1),      tcu::IVec3(2,7,3)));
1678 }
1679
1680 } // Functional
1681 } // gles31
1682 } // deqp