modules/gles31/functional/es31fSynchronizationTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * drawElements Quality Program OpenGL ES 3.1 Module
   3  * -------------------------------------------------
   4  *
   5  * Copyright 2014 The Android Open Source Project
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  *//*!
  20  * \file
  21  * \brief Synchronization Tests
  22  *//*--------------------------------------------------------------------*/
  23
  24 #include "es31fSynchronizationTests.hpp"
  25 #include "tcuTestLog.hpp"
  26 #include "tcuStringTemplate.hpp"
  27 #include "tcuSurface.hpp"
  28 #include "tcuRenderTarget.hpp"
  29 #include "gluRenderContext.hpp"
  30 #include "gluShaderProgram.hpp"
  31 #include "gluObjectWrapper.hpp"
  32 #include "gluPixelTransfer.hpp"
  33 #include "gluContextInfo.hpp"
  34 #include "glwFunctions.hpp"
  35 #include "glwEnums.hpp"
  36 #include "deStringUtil.hpp"
  37 #include "deSharedPtr.hpp"
  38 #include "deMemory.h"
  39 #include "deRandom.hpp"
  40
  41 #include <map>
  42
  43 namespace deqp
  44 {
  45 namespace gles31
  46 {
  47 namespace Functional
  48 {
  49 namespace
  50 {
  51
  52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
  53 {
  54         std::vector<deUint32> chainDelta(valueChain.size());
  55
  56         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
  57                 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
  58
  59         // chainDelta contains now the actual additions applied to the value
  60         // check there exists an addition ramp form 1 to ...
  61         std::sort(chainDelta.begin(), chainDelta.end());
  62
  63         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
  64         {
  65                 if ((int)chainDelta[callNdx] != callNdx+1)
  66                 {
  67                         invalidOperationNdx = callNdx;
  68                         errorDelta = chainDelta[callNdx];
  69                         errorExpected = callNdx+1;
  70
  71                         return false;
  72                 }
  73         }
  74
  75         return true;
  76 }
  77
  78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
  79 {
  80         const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
  81         GLU_EXPECT_NO_ERROR(gl.getError(), "map");
  82
  83         if (!ptr)
  84                 throw tcu::TestError("mapBufferRange returned NULL");
  85
  86         result.resize(numElements);
  87         memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
  88
  89         if (gl.unmapBuffer(target) == GL_FALSE)
  90                 throw tcu::TestError("unmapBuffer returned false");
  91 }
  92
  93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
  94 {
  95         std::vector<deUint32> vec;
  96
  97         readBuffer(gl, target, 1, vec);
  98
  99         return vec[0];
 100 }
 101
 102 //! Generate a ramp of values from 1 to numElements, and shuffle it
 103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
 104 {
 105         de::Random rng(0xabcd);
 106
 107         // some positive (non-zero) unique values
 108         ramp.resize(numElements);
 109         for (int callNdx = 0; callNdx < numElements; ++callNdx)
 110                 ramp[callNdx] = callNdx + 1;
 111
 112         rng.shuffle(ramp.begin(), ramp.end());
 113 }
 114
 115 static std::string specializeShader(Context& context, const char* code)
 116 {
 117         const glu::GLSLVersion                          glslVersion                     = glu::getContextTypeGLSLVersion(context.getRenderContext().getType());
 118         std::map<std::string, std::string>      specializationMap;
 119
 120         specializationMap["GLSL_VERSION_DECL"] = glu::getGLSLVersionDeclaration(glslVersion);
 121
 122         if (glu::contextSupports(context.getRenderContext().getType(), glu::ApiType::es(3, 2)))
 123                 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "";
 124         else
 125                 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "#extension GL_OES_shader_image_atomic : require";
 126
 127         return tcu::StringTemplate(code).specialize(specializationMap);
 128 }
 129
 130 class InterInvocationTestCase : public TestCase
 131 {
 132 public:
 133         enum StorageType
 134         {
 135                 STORAGE_BUFFER = 0,
 136                 STORAGE_IMAGE,
 137
 138                 STORAGE_LAST
 139         };
 140         enum CaseFlags
 141         {
 142                 FLAG_ATOMIC                             = 0x1,
 143                 FLAG_ALIASING_STORAGES  = 0x2,
 144                 FLAG_IN_GROUP                   = 0x4,
 145         };
 146
 147                                                 InterInvocationTestCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
 148                                                 ~InterInvocationTestCase        (void);
 149
 150 private:
 151         void                            init                                            (void);
 152         void                            deinit                                          (void);
 153         IterateResult           iterate                                         (void);
 154
 155         void                            runCompute                                      (void);
 156         bool                            verifyResults                           (void);
 157         virtual std::string     genShaderSource                         (void) const = 0;
 158
 159 protected:
 160         std::string                     genBarrierSource                        (void) const;
 161
 162         const StorageType       m_storage;
 163         const bool                      m_useAtomic;
 164         const bool                      m_aliasingStorages;
 165         const bool                      m_syncWithGroup;
 166         const int                       m_workWidth;                            // !< total work width
 167         const int                       m_workHeight;                           // !<     ...    height
 168         const int                       m_localWidth;                           // !< group width
 169         const int                       m_localHeight;                          // !< group height
 170         const int                       m_elementsPerInvocation;        // !< elements accessed by a single invocation
 171
 172 private:
 173         glw::GLuint                     m_storageBuf;
 174         glw::GLuint                     m_storageTex;
 175         glw::GLuint                     m_resultBuf;
 176         glu::ShaderProgram*     m_program;
 177 };
 178
 179 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 180         : TestCase                                      (context, name, desc)
 181         , m_storage                                     (storage)
 182         , m_useAtomic                           ((flags & FLAG_ATOMIC) != 0)
 183         , m_aliasingStorages            ((flags & FLAG_ALIASING_STORAGES) != 0)
 184         , m_syncWithGroup                       ((flags & FLAG_IN_GROUP) != 0)
 185         , m_workWidth                           (256)
 186         , m_workHeight                          (256)
 187         , m_localWidth                          (16)
 188         , m_localHeight                         (8)
 189         , m_elementsPerInvocation       (8)
 190         , m_storageBuf                          (0)
 191         , m_storageTex                          (0)
 192         , m_resultBuf                           (0)
 193         , m_program                                     (DE_NULL)
 194 {
 195         DE_ASSERT(m_storage < STORAGE_LAST);
 196         DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
 197 }
 198
 199 InterInvocationTestCase::~InterInvocationTestCase (void)
 200 {
 201         deinit();
 202 }
 203
 204 void InterInvocationTestCase::init (void)
 205 {
 206         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
 207         const bool                              supportsES32    = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
 208
 209         // requirements
 210
 211         if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
 212                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
 213
 214         // program
 215
 216         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
 217         m_testCtx.getLog() << *m_program;
 218         if (!m_program->isOk())
 219                 throw tcu::TestError("could not build program");
 220
 221         // source
 222
 223         if (m_storage == STORAGE_BUFFER)
 224         {
 225                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
 226                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
 227                 std::vector<deUint32>   zeroBuffer              (bufferElements, 0);
 228
 229                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
 230
 231                 gl.genBuffers(1, &m_storageBuf);
 232                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
 233                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
 234                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
 235         }
 236         else if (m_storage == STORAGE_IMAGE)
 237         {
 238                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
 239                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
 240
 241                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
 242
 243                 gl.genTextures(1, &m_storageTex);
 244                 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
 245                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
 246                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 247                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 248                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
 249
 250                 // Zero-fill
 251                 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
 252
 253                 {
 254                         const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
 255                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
 256                         GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
 257                 }
 258         }
 259         else
 260                 DE_ASSERT(DE_FALSE);
 261
 262         // destination
 263
 264         {
 265                 const int                               bufferElements  = m_workWidth * m_workHeight;
 266                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
 267                 std::vector<deInt32>    negativeBuffer  (bufferElements, -1);
 268
 269                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
 270
 271                 gl.genBuffers(1, &m_resultBuf);
 272                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
 273                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
 274                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
 275         }
 276 }
 277
 278 void InterInvocationTestCase::deinit (void)
 279 {
 280         if (m_storageBuf)
 281         {
 282                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
 283                 m_storageBuf = DE_NULL;
 284         }
 285
 286         if (m_storageTex)
 287         {
 288                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
 289                 m_storageTex = DE_NULL;
 290         }
 291
 292         if (m_resultBuf)
 293         {
 294                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
 295                 m_resultBuf = DE_NULL;
 296         }
 297
 298         delete m_program;
 299         m_program = DE_NULL;
 300 }
 301
 302 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
 303 {
 304         // Dispatch
 305         runCompute();
 306
 307         // Verify buffer contents
 308         if (verifyResults())
 309                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
 310         else
 311                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
 312
 313         return STOP;
 314 }
 315
 316 void InterInvocationTestCase::runCompute (void)
 317 {
 318         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
 319         const int                               groupsX = m_workWidth / m_localWidth;
 320         const int                               groupsY = m_workHeight / m_localHeight;
 321
 322         DE_ASSERT((m_workWidth % m_localWidth) == 0);
 323         DE_ASSERT((m_workHeight % m_localHeight) == 0);
 324
 325         m_testCtx.getLog()
 326                 << tcu::TestLog::Message
 327                 << "Dispatching compute.\n"
 328                 << "    group size: " << m_localWidth << "x" << m_localHeight << "\n"
 329                 << "    dispatch size: " << groupsX << "x" << groupsY << "\n"
 330                 << "    total work size: " << m_workWidth << "x" << m_workHeight << "\n"
 331                 << tcu::TestLog::EndMessage;
 332
 333         gl.useProgram(m_program->getProgram());
 334
 335         // source
 336         if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
 337         {
 338                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
 339                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
 340         }
 341         else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
 342         {
 343                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
 344                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
 345                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
 346
 347                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
 348         }
 349         else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
 350         {
 351                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
 352                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
 353         }
 354         else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
 355         {
 356                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
 357                 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
 358
 359                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
 360
 361                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
 362         }
 363         else
 364                 DE_ASSERT(DE_FALSE);
 365
 366         // destination
 367         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
 368         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
 369
 370         // dispatch
 371         gl.dispatchCompute(groupsX, groupsY, 1);
 372         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
 373 }
 374
 375 bool InterInvocationTestCase::verifyResults (void)
 376 {
 377         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
 378         const int                               errorFloodThreshold     = 5;
 379         int                                             numErrorsLogged         = 0;
 380         const void*                             mapped                          = DE_NULL;
 381         std::vector<deInt32>    results                         (m_workWidth * m_workHeight);
 382         bool                                    error                           = false;
 383
 384         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
 385         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
 386         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
 387
 388         // copy to properly aligned array
 389         deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
 390
 391         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
 392                 throw tcu::TestError("memory map store corrupted");
 393
 394         // check the results
 395         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
 396         {
 397                 if (results[ndx] != 1)
 398                 {
 399                         error = true;
 400
 401                         if (numErrorsLogged == 0)
 402                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
 403                         if (numErrorsLogged++ < errorFloodThreshold)
 404                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
 405                         else
 406                         {
 407                                 // after N errors, no point continuing verification
 408                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
 409                                 break;
 410                         }
 411                 }
 412         }
 413
 414         if (!error)
 415                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
 416         return !error;
 417 }
 418
 419 std::string InterInvocationTestCase::genBarrierSource (void) const
 420 {
 421         std::ostringstream buf;
 422
 423         if (m_syncWithGroup)
 424         {
 425                 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
 426                 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
 427                 //       we only require intra-workgroup synchronization.
 428                 buf << "\n"
 429                         << "    groupMemoryBarrier();\n"
 430                         << "    barrier();\n"
 431                         << "\n";
 432         }
 433         else if (m_storage == STORAGE_BUFFER)
 434         {
 435                 DE_ASSERT(!m_syncWithGroup);
 436
 437                 // Waiting only for data written by this invocation. Since all buffer reads and writes are
 438                 // processed in order (within a single invocation), we don't have to do anything.
 439                 buf << "\n";
 440         }
 441         else if (m_storage == STORAGE_IMAGE)
 442         {
 443                 DE_ASSERT(!m_syncWithGroup);
 444
 445                 // Waiting only for data written by this invocation. But since operations complete in undefined
 446                 // order, we have to wait for them to complete.
 447                 buf << "\n"
 448                         << "    memoryBarrierImage();\n"
 449                         << "\n";
 450         }
 451         else
 452                 DE_ASSERT(DE_FALSE);
 453
 454         return buf.str();
 455 }
 456
 457 class InvocationBasicCase : public InterInvocationTestCase
 458 {
 459 public:
 460                                                         InvocationBasicCase             (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 461 private:
 462         std::string                             genShaderSource                 (void) const;
 463         virtual std::string             genShaderMainBlock              (void) const = 0;
 464 };
 465
 466 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 467         : InterInvocationTestCase(context, name, desc, storage, flags)
 468 {
 469 }
 470
 471 std::string InvocationBasicCase::genShaderSource (void) const
 472 {
 473         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
 474         std::ostringstream      buf;
 475
 476         buf << "${GLSL_VERSION_DECL}\n"
 477                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
 478                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
 479                 << "layout(binding=0, std430) buffer Output\n"
 480                 << "{\n"
 481                 << "    highp int values[];\n"
 482                 << "} sb_result;\n";
 483
 484         if (m_storage == STORAGE_BUFFER)
 485                 buf << "layout(binding=1, std430) coherent buffer Storage\n"
 486                         << "{\n"
 487                         << "    highp int values[];\n"
 488                         << "} sb_store;\n"
 489                         << "\n"
 490                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
 491                         << "{\n"
 492                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
 493                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
 494                         << "}\n";
 495         else if (m_storage == STORAGE_IMAGE)
 496                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
 497                         << "\n"
 498                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
 499                         << "{\n"
 500                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
 501                         << "}\n";
 502         else
 503                 DE_ASSERT(DE_FALSE);
 504
 505         buf << "\n"
 506                 << "void main (void)\n"
 507                 << "{\n"
 508                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
 509                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
 510                 << "    bool allOk      = true;\n"
 511                 << "\n"
 512                 << genShaderMainBlock()
 513                 << "\n"
 514                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
 515                 << "}\n";
 516
 517         return specializeShader(m_context, buf.str().c_str());
 518 }
 519
 520 class InvocationWriteReadCase : public InvocationBasicCase
 521 {
 522 public:
 523                                         InvocationWriteReadCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 524 private:
 525         std::string             genShaderMainBlock                      (void) const;
 526 };
 527
 528 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 529         : InvocationBasicCase(context, name, desc, storage, flags)
 530 {
 531 }
 532
 533 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
 534 {
 535         std::ostringstream buf;
 536
 537         // write
 538
 539         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 540         {
 541                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 542                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
 543                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 544                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
 545                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 546                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
 547                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 548                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
 549                 else
 550                         DE_ASSERT(DE_FALSE);
 551         }
 552
 553         // barrier
 554
 555         buf << genBarrierSource();
 556
 557         // read
 558
 559         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 560         {
 561                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 562
 563                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 564                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
 565                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 566                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
 567                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 568                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
 569                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 570                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
 571                 else
 572                         DE_ASSERT(DE_FALSE);
 573         }
 574
 575         return buf.str();
 576 }
 577
 578 class InvocationReadWriteCase : public InvocationBasicCase
 579 {
 580 public:
 581                                         InvocationReadWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 582 private:
 583         std::string             genShaderMainBlock                      (void) const;
 584 };
 585
 586 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 587         : InvocationBasicCase(context, name, desc, storage, flags)
 588 {
 589 }
 590
 591 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
 592 {
 593         std::ostringstream buf;
 594
 595         // read
 596
 597         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 598         {
 599                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 600
 601                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 602                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
 603                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 604                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
 605                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 606                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
 607                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 608                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
 609                 else
 610                         DE_ASSERT(DE_FALSE);
 611         }
 612
 613         // barrier
 614
 615         buf << genBarrierSource();
 616
 617         // write
 618
 619         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 620         {
 621                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 622                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
 623                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 624                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
 625                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 626                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
 627                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 628                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
 629                 else
 630                         DE_ASSERT(DE_FALSE);
 631         }
 632
 633         return buf.str();
 634 }
 635
 636 class InvocationOverWriteCase : public InvocationBasicCase
 637 {
 638 public:
 639                                         InvocationOverWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 640 private:
 641         std::string             genShaderMainBlock                      (void) const;
 642 };
 643
 644 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 645         : InvocationBasicCase(context, name, desc, storage, flags)
 646 {
 647 }
 648
 649 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
 650 {
 651         std::ostringstream buf;
 652
 653         // write
 654
 655         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 656         {
 657                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 658                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
 659                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 660                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
 661                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 662                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
 663                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 664                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
 665                 else
 666                         DE_ASSERT(DE_FALSE);
 667         }
 668
 669         // barrier
 670
 671         buf << genBarrierSource();
 672
 673         // write over
 674
 675         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 676         {
 677                 // write another invocation's value or our own value depending on test type
 678                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 679
 680                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 681                         buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
 682                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 683                         buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
 684                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 685                         buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
 686                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 687                         buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
 688                 else
 689                         DE_ASSERT(DE_FALSE);
 690         }
 691
 692         // barrier
 693
 694         buf << genBarrierSource();
 695
 696         // read
 697
 698         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 699         {
 700                 // check another invocation's value or our own value depending on test type
 701                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 702
 703                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 704                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
 705                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 706                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
 707                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 708                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
 709                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 710                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
 711                 else
 712                         DE_ASSERT(DE_FALSE);
 713         }
 714
 715         return buf.str();
 716 }
 717
 718 class InvocationAliasWriteCase : public InterInvocationTestCase
 719 {
 720 public:
 721         enum TestType
 722         {
 723                 TYPE_WRITE = 0,
 724                 TYPE_OVERWRITE,
 725
 726                 TYPE_LAST
 727         };
 728
 729                                         InvocationAliasWriteCase        (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
 730 private:
 731         std::string             genShaderSource                         (void) const;
 732
 733         const TestType  m_type;
 734 };
 735
 736 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
 737         : InterInvocationTestCase       (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
 738         , m_type                                        (type)
 739 {
 740         DE_ASSERT(type < TYPE_LAST);
 741 }
 742
 743 std::string InvocationAliasWriteCase::genShaderSource (void) const
 744 {
 745         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
 746         std::ostringstream      buf;
 747
 748         buf << "${GLSL_VERSION_DECL}\n"
 749                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
 750                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
 751                 << "layout(binding=0, std430) buffer Output\n"
 752                 << "{\n"
 753                 << "    highp int values[];\n"
 754                 << "} sb_result;\n";
 755
 756         if (m_storage == STORAGE_BUFFER)
 757                 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
 758                         << "{\n"
 759                         << "    highp int values[];\n"
 760                         << "} sb_store0;\n"
 761                         << "layout(binding=2, std430) coherent buffer Storage1\n"
 762                         << "{\n"
 763                         << "    highp int values[];\n"
 764                         << "} sb_store1;\n"
 765                         << "\n"
 766                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
 767                         << "{\n"
 768                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
 769                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
 770                         << "}\n";
 771         else if (m_storage == STORAGE_IMAGE)
 772                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
 773                         << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
 774                         << "\n"
 775                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
 776                         << "{\n"
 777                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
 778                         << "}\n";
 779         else
 780                 DE_ASSERT(DE_FALSE);
 781
 782         buf << "\n"
 783                 << "void main (void)\n"
 784                 << "{\n"
 785                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
 786                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
 787                 << "    bool allOk      = true;\n"
 788                 << "\n";
 789
 790         if (m_type == TYPE_OVERWRITE)
 791         {
 792                 // write
 793
 794                 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 795                 {
 796                         if (m_storage == STORAGE_BUFFER && m_useAtomic)
 797                                 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
 798                         else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 799                                 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
 800                         else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 801                                 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
 802                         else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 803                                 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
 804                         else
 805                                 DE_ASSERT(DE_FALSE);
 806                 }
 807
 808                 // barrier
 809
 810                 buf << genBarrierSource();
 811         }
 812         else
 813                 DE_ASSERT(m_type == TYPE_WRITE);
 814
 815         // write (again)
 816
 817         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 818         {
 819                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 820
 821                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 822                         buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
 823                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 824                         buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
 825                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 826                         buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
 827                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 828                         buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
 829                 else
 830                         DE_ASSERT(DE_FALSE);
 831         }
 832
 833         // barrier
 834
 835         buf << genBarrierSource();
 836
 837         // read
 838
 839         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 840         {
 841                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 842                         buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
 843                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 844                         buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
 845                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 846                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
 847                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 848                         buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
 849                 else
 850                         DE_ASSERT(DE_FALSE);
 851         }
 852
 853         // return result
 854
 855         buf << "\n"
 856                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
 857                 << "}\n";
 858
 859         return specializeShader(m_context, buf.str().c_str());
 860 }
 861
 862 namespace op
 863 {
 864
 865 struct WriteData
 866 {
 867         int targetHandle;
 868         int seed;
 869
 870         static WriteData Generate(int targetHandle, int seed)
 871         {
 872                 WriteData retVal;
 873
 874                 retVal.targetHandle = targetHandle;
 875                 retVal.seed = seed;
 876
 877                 return retVal;
 878         }
 879 };
 880
 881 struct ReadData
 882 {
 883         int targetHandle;
 884         int seed;
 885
 886         static ReadData Generate(int targetHandle, int seed)
 887         {
 888                 ReadData retVal;
 889
 890                 retVal.targetHandle = targetHandle;
 891                 retVal.seed = seed;
 892
 893                 return retVal;
 894         }
 895 };
 896
 897 struct Barrier
 898 {
 899 };
 900
 901 struct WriteDataInterleaved
 902 {
 903         int             targetHandle;
 904         int             seed;
 905         bool    evenOdd;
 906
 907         static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
 908         {
 909                 WriteDataInterleaved retVal;
 910
 911                 retVal.targetHandle = targetHandle;
 912                 retVal.seed = seed;
 913                 retVal.evenOdd = evenOdd;
 914
 915                 return retVal;
 916         }
 917 };
 918
 919 struct ReadDataInterleaved
 920 {
 921         int targetHandle;
 922         int seed0;
 923         int seed1;
 924
 925         static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
 926         {
 927                 ReadDataInterleaved retVal;
 928
 929                 retVal.targetHandle = targetHandle;
 930                 retVal.seed0 = seed0;
 931                 retVal.seed1 = seed1;
 932
 933                 return retVal;
 934         }
 935 };
 936
 937 struct ReadMultipleData
 938 {
 939         int targetHandle0;
 940         int seed0;
 941         int targetHandle1;
 942         int seed1;
 943
 944         static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
 945         {
 946                 ReadMultipleData retVal;
 947
 948                 retVal.targetHandle0 = targetHandle0;
 949                 retVal.seed0 = seed0;
 950                 retVal.targetHandle1 = targetHandle1;
 951                 retVal.seed1 = seed1;
 952
 953                 return retVal;
 954         }
 955 };
 956
 957 struct ReadZeroData
 958 {
 959         int targetHandle;
 960
 961         static ReadZeroData Generate(int targetHandle)
 962         {
 963                 ReadZeroData retVal;
 964
 965                 retVal.targetHandle = targetHandle;
 966
 967                 return retVal;
 968         }
 969 };
 970
 971 } // namespace op
 972
 973 class InterCallTestCase;
 974
 975 class InterCallOperations
 976 {
 977 public:
 978         InterCallOperations& operator<< (const op::WriteData&);
 979         InterCallOperations& operator<< (const op::ReadData&);
 980         InterCallOperations& operator<< (const op::Barrier&);
 981         InterCallOperations& operator<< (const op::ReadMultipleData&);
 982         InterCallOperations& operator<< (const op::WriteDataInterleaved&);
 983         InterCallOperations& operator<< (const op::ReadDataInterleaved&);
 984         InterCallOperations& operator<< (const op::ReadZeroData&);
 985
 986 private:
 987         struct Command
 988         {
 989                 enum CommandType
 990                 {
 991                         TYPE_WRITE = 0,
 992                         TYPE_READ,
 993                         TYPE_BARRIER,
 994                         TYPE_READ_MULTIPLE,
 995                         TYPE_WRITE_INTERLEAVE,
 996                         TYPE_READ_INTERLEAVE,
 997                         TYPE_READ_ZERO,
 998
 999                         TYPE_LAST
1000                 };
1001
1002                 CommandType type;
1003
1004                 union CommandUnion
1005                 {
1006                         op::WriteData                           write;
1007                         op::ReadData                            read;
1008                         op::Barrier                                     barrier;
1009                         op::ReadMultipleData            readMulti;
1010                         op::WriteDataInterleaved        writeInterleave;
1011                         op::ReadDataInterleaved         readInterleave;
1012                         op::ReadZeroData                        readZero;
1013                 } u_cmd;
1014         };
1015
1016         friend class InterCallTestCase;
1017
1018         std::vector<Command> m_cmds;
1019 };
1020
1021 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1022 {
1023         m_cmds.push_back(Command());
1024         m_cmds.back().type = Command::TYPE_WRITE;
1025         m_cmds.back().u_cmd.write = cmd;
1026
1027         return *this;
1028 }
1029
1030 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1031 {
1032         m_cmds.push_back(Command());
1033         m_cmds.back().type = Command::TYPE_READ;
1034         m_cmds.back().u_cmd.read = cmd;
1035
1036         return *this;
1037 }
1038
1039 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1040 {
1041         m_cmds.push_back(Command());
1042         m_cmds.back().type = Command::TYPE_BARRIER;
1043         m_cmds.back().u_cmd.barrier = cmd;
1044
1045         return *this;
1046 }
1047
1048 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1049 {
1050         m_cmds.push_back(Command());
1051         m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1052         m_cmds.back().u_cmd.readMulti = cmd;
1053
1054         return *this;
1055 }
1056
1057 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1058 {
1059         m_cmds.push_back(Command());
1060         m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1061         m_cmds.back().u_cmd.writeInterleave = cmd;
1062
1063         return *this;
1064 }
1065
1066 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1067 {
1068         m_cmds.push_back(Command());
1069         m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1070         m_cmds.back().u_cmd.readInterleave = cmd;
1071
1072         return *this;
1073 }
1074
1075 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1076 {
1077         m_cmds.push_back(Command());
1078         m_cmds.back().type = Command::TYPE_READ_ZERO;
1079         m_cmds.back().u_cmd.readZero = cmd;
1080
1081         return *this;
1082 }
1083
1084 class InterCallTestCase : public TestCase
1085 {
1086 public:
1087         enum StorageType
1088         {
1089                 STORAGE_BUFFER = 0,
1090                 STORAGE_IMAGE,
1091
1092                 STORAGE_LAST
1093         };
1094         enum Flags
1095         {
1096                 FLAG_USE_ATOMIC = 1,
1097                 FLAG_USE_INT    = 2,
1098         };
1099                                                                                                         InterCallTestCase                       (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1100                                                                                                         ~InterCallTestCase                      (void);
1101
1102 private:
1103         void                                                                                    init                                            (void);
1104         void                                                                                    deinit                                          (void);
1105         IterateResult                                                                   iterate                                         (void);
1106         bool                                                                                    verifyResults                           (void);
1107
1108         void                                                                                    runCommand                                      (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1109         void                                                                                    runCommand                                      (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1110         void                                                                                    runCommand                                      (const op::Barrier&);
1111         void                                                                                    runCommand                                      (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1112         void                                                                                    runCommand                                      (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1113         void                                                                                    runCommand                                      (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1114         void                                                                                    runCommand                                      (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1115         void                                                                                    runSingleRead                           (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1116
1117         glw::GLuint                                                                             genStorage                                      (int friendlyName);
1118         glw::GLuint                                                                             genResultStorage                        (void);
1119         glu::ShaderProgram*                                                             genWriteProgram                         (int seed);
1120         glu::ShaderProgram*                                                             genReadProgram                          (int seed);
1121         glu::ShaderProgram*                                                             genReadMultipleProgram          (int seed0, int seed1);
1122         glu::ShaderProgram*                                                             genWriteInterleavedProgram      (int seed, bool evenOdd);
1123         glu::ShaderProgram*                                                             genReadInterleavedProgram       (int seed0, int seed1);
1124         glu::ShaderProgram*                                                             genReadZeroProgram                      (void);
1125
1126         const StorageType                                                               m_storage;
1127         const int                                                                               m_invocationGridSize;   // !< width and height of the two dimensional work dispatch
1128         const int                                                                               m_perInvocationSize;    // !< number of elements accessed in single invocation
1129         const std::vector<InterCallOperations::Command> m_cmds;
1130         const bool                                                                              m_useAtomic;
1131         const bool                                                                              m_formatInteger;
1132
1133         std::vector<glu::ShaderProgram*>                                m_operationPrograms;
1134         std::vector<glw::GLuint>                                                m_operationResultStorages;
1135         std::map<int, glw::GLuint>                                              m_storageIDs;
1136 };
1137
1138 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1139         : TestCase                                      (context, name, desc)
1140         , m_storage                                     (storage)
1141         , m_invocationGridSize          (512)
1142         , m_perInvocationSize           (2)
1143         , m_cmds                                        (ops.m_cmds)
1144         , m_useAtomic                           ((flags & FLAG_USE_ATOMIC) != 0)
1145         , m_formatInteger                       ((flags & FLAG_USE_INT) != 0)
1146 {
1147 }
1148
1149 InterCallTestCase::~InterCallTestCase (void)
1150 {
1151         deinit();
1152 }
1153
1154 void InterCallTestCase::init (void)
1155 {
1156         int                     programFriendlyName = 0;
1157         const bool      supportsES32            = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
1158
1159         // requirements
1160
1161         if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1162                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1163
1164         // generate resources and validate command list
1165
1166         m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1167         m_operationResultStorages.resize(m_cmds.size(), 0);
1168
1169         for (int step = 0; step < (int)m_cmds.size(); ++step)
1170         {
1171                 switch (m_cmds[step].type)
1172                 {
1173                         case InterCallOperations::Command::TYPE_WRITE:
1174                         {
1175                                 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1176
1177                                 // new storage handle?
1178                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1179                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1180
1181                                 // program
1182                                 {
1183                                         glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1184
1185                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1186                                         m_testCtx.getLog() << *program;
1187
1188                                         if (!program->isOk())
1189                                                 throw tcu::TestError("could not build program");
1190
1191                                         m_operationPrograms[step] = program;
1192                                 }
1193                                 break;
1194                         }
1195
1196                         case InterCallOperations::Command::TYPE_READ:
1197                         {
1198                                 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1199                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1200
1201                                 // program and result storage
1202                                 {
1203                                         glu::ShaderProgram* program = genReadProgram(cmd.seed);
1204
1205                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1206                                         m_testCtx.getLog() << *program;
1207
1208                                         if (!program->isOk())
1209                                                 throw tcu::TestError("could not build program");
1210
1211                                         m_operationPrograms[step] = program;
1212                                         m_operationResultStorages[step] = genResultStorage();
1213                                 }
1214                                 break;
1215                         }
1216
1217                         case InterCallOperations::Command::TYPE_BARRIER:
1218                         {
1219                                 break;
1220                         }
1221
1222                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1223                         {
1224                                 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1225                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1226                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1227
1228                                 // program
1229                                 {
1230                                         glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1231
1232                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1233                                         m_testCtx.getLog() << *program;
1234
1235                                         if (!program->isOk())
1236                                                 throw tcu::TestError("could not build program");
1237
1238                                         m_operationPrograms[step] = program;
1239                                         m_operationResultStorages[step] = genResultStorage();
1240                                 }
1241                                 break;
1242                         }
1243
1244                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1245                         {
1246                                 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1247
1248                                 // new storage handle?
1249                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1250                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1251
1252                                 // program
1253                                 {
1254                                         glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1255
1256                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1257                                         m_testCtx.getLog() << *program;
1258
1259                                         if (!program->isOk())
1260                                                 throw tcu::TestError("could not build program");
1261
1262                                         m_operationPrograms[step] = program;
1263                                 }
1264                                 break;
1265                         }
1266
1267                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1268                         {
1269                                 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1270                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1271
1272                                 // program
1273                                 {
1274                                         glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1275
1276                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1277                                         m_testCtx.getLog() << *program;
1278
1279                                         if (!program->isOk())
1280                                                 throw tcu::TestError("could not build program");
1281
1282                                         m_operationPrograms[step] = program;
1283                                         m_operationResultStorages[step] = genResultStorage();
1284                                 }
1285                                 break;
1286                         }
1287
1288                         case InterCallOperations::Command::TYPE_READ_ZERO:
1289                         {
1290                                 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1291
1292                                 // new storage handle?
1293                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1294                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1295
1296                                 // program
1297                                 {
1298                                         glu::ShaderProgram* program = genReadZeroProgram();
1299
1300                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1301                                         m_testCtx.getLog() << *program;
1302
1303                                         if (!program->isOk())
1304                                                 throw tcu::TestError("could not build program");
1305
1306                                         m_operationPrograms[step] = program;
1307                                         m_operationResultStorages[step] = genResultStorage();
1308                                 }
1309                                 break;
1310                         }
1311
1312                         default:
1313                                 DE_ASSERT(DE_FALSE);
1314                 }
1315         }
1316 }
1317
1318 void InterCallTestCase::deinit (void)
1319 {
1320         // programs
1321         for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1322                 delete m_operationPrograms[ndx];
1323         m_operationPrograms.clear();
1324
1325         // result storages
1326         for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1327         {
1328                 if (m_operationResultStorages[ndx])
1329                         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1330         }
1331         m_operationResultStorages.clear();
1332
1333         // storage
1334         for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1335         {
1336                 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1337
1338                 if (m_storage == STORAGE_BUFFER)
1339                         gl.deleteBuffers(1, &it->second);
1340                 else if (m_storage == STORAGE_IMAGE)
1341                         gl.deleteTextures(1, &it->second);
1342                 else
1343                         DE_ASSERT(DE_FALSE);
1344         }
1345         m_storageIDs.clear();
1346 }
1347
1348 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1349 {
1350         int programFriendlyName                 = 0;
1351         int resultStorageFriendlyName   = 0;
1352
1353         m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1354
1355         // run steps
1356
1357         for (int step = 0; step < (int)m_cmds.size(); ++step)
1358         {
1359                 switch (m_cmds[step].type)
1360                 {
1361                         case InterCallOperations::Command::TYPE_WRITE:                          runCommand(m_cmds[step].u_cmd.write,                    step,   programFriendlyName);                                                           break;
1362                         case InterCallOperations::Command::TYPE_READ:                           runCommand(m_cmds[step].u_cmd.read,                             step,   programFriendlyName, resultStorageFriendlyName);        break;
1363                         case InterCallOperations::Command::TYPE_BARRIER:                        runCommand(m_cmds[step].u_cmd.barrier);                                                                                                                                         break;
1364                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:          runCommand(m_cmds[step].u_cmd.readMulti,                step,   programFriendlyName, resultStorageFriendlyName);        break;
1365                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:       runCommand(m_cmds[step].u_cmd.writeInterleave,  step,   programFriendlyName);                                                           break;
1366                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:        runCommand(m_cmds[step].u_cmd.readInterleave,   step,   programFriendlyName, resultStorageFriendlyName);        break;
1367                         case InterCallOperations::Command::TYPE_READ_ZERO:                      runCommand(m_cmds[step].u_cmd.readZero,                 step,   programFriendlyName, resultStorageFriendlyName);        break;
1368                         default:
1369                                 DE_ASSERT(DE_FALSE);
1370                 }
1371         }
1372
1373         // read results from result buffers
1374         if (verifyResults())
1375                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1376         else
1377                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1378
1379         return STOP;
1380 }
1381
1382 bool InterCallTestCase::verifyResults (void)
1383 {
1384         int             resultBufferFriendlyName        = 0;
1385         bool    allResultsOk                            = true;
1386         bool    anyResult                                       = false;
1387
1388         m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1389
1390         for (int step = 0; step < (int)m_cmds.size(); ++step)
1391         {
1392                 const int       errorFloodThreshold     = 5;
1393                 int                     numErrorsLogged         = 0;
1394
1395                 if (m_operationResultStorages[step])
1396                 {
1397                         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1398                         const void*                             mapped  = DE_NULL;
1399                         std::vector<deInt32>    results (m_invocationGridSize * m_invocationGridSize);
1400                         bool                                    error   = false;
1401
1402                         anyResult = true;
1403
1404                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1405                         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1406                         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1407
1408                         // copy to properly aligned array
1409                         deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1410
1411                         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1412                                 throw tcu::TestError("memory map store corrupted");
1413
1414                         // check the results
1415                         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1416                         {
1417                                 if (results[ndx] != 1)
1418                                 {
1419                                         error = true;
1420
1421                                         if (numErrorsLogged == 0)
1422                                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1423                                         if (numErrorsLogged++ < errorFloodThreshold)
1424                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1425                                         else
1426                                         {
1427                                                 // after N errors, no point continuing verification
1428                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1429                                                 break;
1430                                         }
1431                                 }
1432                         }
1433
1434                         if (error)
1435                         {
1436                                 allResultsOk = false;
1437                         }
1438                         else
1439                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1440                 }
1441         }
1442
1443         DE_ASSERT(anyResult);
1444         DE_UNREF(anyResult);
1445
1446         return allResultsOk;
1447 }
1448
1449 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1450 {
1451         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1452
1453         m_testCtx.getLog()
1454                 << tcu::TestLog::Message
1455                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1456                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1457                 << tcu::TestLog::EndMessage;
1458
1459         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1460
1461         // set destination
1462         if (m_storage == STORAGE_BUFFER)
1463         {
1464                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1465
1466                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1467                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1468         }
1469         else if (m_storage == STORAGE_IMAGE)
1470         {
1471                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1472
1473                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1474                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1475         }
1476         else
1477                 DE_ASSERT(DE_FALSE);
1478
1479         // calc
1480         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1481         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1482 }
1483
1484 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1485 {
1486         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1487 }
1488
1489 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1490 {
1491         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1492
1493         DE_UNREF(cmd);
1494
1495         if (m_storage == STORAGE_BUFFER)
1496         {
1497                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1498                 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1499         }
1500         else if (m_storage == STORAGE_IMAGE)
1501         {
1502                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1503                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1504         }
1505         else
1506                 DE_ASSERT(DE_FALSE);
1507 }
1508
1509 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1510 {
1511         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1512
1513         m_testCtx.getLog()
1514                 << tcu::TestLog::Message
1515                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1516                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1517                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1518                 << tcu::TestLog::EndMessage;
1519
1520         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1521
1522         // set sources
1523         if (m_storage == STORAGE_BUFFER)
1524         {
1525                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1526                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1527
1528                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1529                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1530                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1531         }
1532         else if (m_storage == STORAGE_IMAGE)
1533         {
1534                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1535                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1536
1537                 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1538                 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1539                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1540         }
1541         else
1542                 DE_ASSERT(DE_FALSE);
1543
1544         // set destination
1545         DE_ASSERT(m_operationResultStorages[stepNdx]);
1546         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1547         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1548
1549         // calc
1550         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1551         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1552 }
1553
1554 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1555 {
1556         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1557
1558         m_testCtx.getLog()
1559                 << tcu::TestLog::Message
1560                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1561                 << "    Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1562                 << "    Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1563                 << tcu::TestLog::EndMessage;
1564
1565         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1566
1567         // set destination
1568         if (m_storage == STORAGE_BUFFER)
1569         {
1570                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1571
1572                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1573                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1574         }
1575         else if (m_storage == STORAGE_IMAGE)
1576         {
1577                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1578
1579                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1580                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1581         }
1582         else
1583                 DE_ASSERT(DE_FALSE);
1584
1585         // calc
1586         gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1587         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1588 }
1589
1590 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1591 {
1592         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1593 }
1594
1595 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1596 {
1597         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1598 }
1599
1600 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1601 {
1602         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1603
1604         m_testCtx.getLog()
1605                 << tcu::TestLog::Message
1606                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1607                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1608                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1609                 << tcu::TestLog::EndMessage;
1610
1611         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1612
1613         // set source
1614         if (m_storage == STORAGE_BUFFER)
1615         {
1616                 DE_ASSERT(m_storageIDs[targetHandle]);
1617
1618                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1619                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1620         }
1621         else if (m_storage == STORAGE_IMAGE)
1622         {
1623                 DE_ASSERT(m_storageIDs[targetHandle]);
1624
1625                 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1626                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1627         }
1628         else
1629                 DE_ASSERT(DE_FALSE);
1630
1631         // set destination
1632         DE_ASSERT(m_operationResultStorages[stepNdx]);
1633         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1634         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1635
1636         // calc
1637         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1638         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1639 }
1640
1641 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1642 {
1643         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1644
1645         if (m_storage == STORAGE_BUFFER)
1646         {
1647                 const int               numElements             = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1648                 const int               bufferSize              = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1649                 glw::GLuint             retVal                  = 0;
1650
1651                 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1652
1653                 gl.genBuffers(1, &retVal);
1654                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1655
1656                 if (m_formatInteger)
1657                 {
1658                         const std::vector<deUint32> zeroBuffer(numElements, 0);
1659                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1660                 }
1661                 else
1662                 {
1663                         const std::vector<float> zeroBuffer(numElements, 0.0f);
1664                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1665                 }
1666                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1667
1668                 return retVal;
1669         }
1670         else if (m_storage == STORAGE_IMAGE)
1671         {
1672                 const int       imageWidth      = m_invocationGridSize;
1673                 const int       imageHeight     = m_invocationGridSize * m_perInvocationSize;
1674                 glw::GLuint     retVal          = 0;
1675
1676                 m_testCtx.getLog()
1677                         << tcu::TestLog::Message
1678                         << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1679                         << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1680                         << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1681                         << tcu::TestLog::EndMessage;
1682
1683                 gl.genTextures(1, &retVal);
1684                 gl.bindTexture(GL_TEXTURE_2D, retVal);
1685
1686                 if (m_formatInteger)
1687                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1688                 else
1689                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1690
1691                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1692                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1693                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1694
1695                 m_testCtx.getLog()
1696                         << tcu::TestLog::Message
1697                         << "Filling image with 0"
1698                         << tcu::TestLog::EndMessage;
1699
1700                 if (m_formatInteger)
1701                 {
1702                         const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1703                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1704                 }
1705                 else
1706                 {
1707                         const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1708                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1709                 }
1710
1711                 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1712
1713                 return retVal;
1714         }
1715         else
1716         {
1717                 DE_ASSERT(DE_FALSE);
1718                 return 0;
1719         }
1720 }
1721
1722 glw::GLuint InterCallTestCase::genResultStorage (void)
1723 {
1724         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1725         glw::GLuint                             retVal  = 0;
1726
1727         gl.genBuffers(1, &retVal);
1728         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1729         gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1730         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1731
1732         return retVal;
1733 }
1734
1735 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1736 {
1737         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1738         std::ostringstream      buf;
1739
1740         buf << "${GLSL_VERSION_DECL}\n"
1741                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1742                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1743
1744         if (m_storage == STORAGE_BUFFER)
1745                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1746                         << "{\n"
1747                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1748                         << "} sb_out;\n";
1749         else if (m_storage == STORAGE_IMAGE)
1750                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1751         else
1752                 DE_ASSERT(DE_FALSE);
1753
1754         buf << "\n"
1755                 << "void main (void)\n"
1756                 << "{\n"
1757                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1758                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1759                 << "\n";
1760
1761         // Write to buffer/image m_perInvocationSize elements
1762         if (m_storage == STORAGE_BUFFER)
1763         {
1764                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1765                 {
1766                         if (m_useAtomic)
1767                                 buf << "        atomicExchange(";
1768                         else
1769                                 buf << "        ";
1770
1771                         buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1772
1773                         if (m_useAtomic)
1774                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1775                         else
1776                                 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1777                 }
1778         }
1779         else if (m_storage == STORAGE_IMAGE)
1780         {
1781                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1782                 {
1783                         if (m_useAtomic)
1784                                 buf << "        imageAtomicExchange";
1785                         else
1786                                 buf << "        imageStore";
1787
1788                         buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1789
1790                         if (m_useAtomic)
1791                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1792                         else
1793                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1794                 }
1795         }
1796         else
1797                 DE_ASSERT(DE_FALSE);
1798
1799         buf << "}\n";
1800
1801         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1802 }
1803
1804 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1805 {
1806         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1807         std::ostringstream      buf;
1808
1809         buf << "${GLSL_VERSION_DECL}\n"
1810                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1811                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1812
1813         if (m_storage == STORAGE_BUFFER)
1814                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1815                         << "{\n"
1816                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1817                         << "} sb_in;\n";
1818         else if (m_storage == STORAGE_IMAGE)
1819                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1820         else
1821                 DE_ASSERT(DE_FALSE);
1822
1823         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1824                 << "{\n"
1825                 << "    highp int resultOk[];\n"
1826                 << "} sb_result;\n"
1827                 << "\n"
1828                 << "void main (void)\n"
1829                 << "{\n"
1830                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1831                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1832                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1833                 << "    bool allOk = true;\n"
1834                 << "\n";
1835
1836         // Verify data
1837
1838         if (m_storage == STORAGE_BUFFER)
1839         {
1840                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1841                 {
1842                         if (!m_useAtomic)
1843                                 buf << "        allOk = allOk && (sb_in.values[(groupNdx + "
1844                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1845                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1846                         else
1847                                 buf << "        allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1848                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1849                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1850                 }
1851         }
1852         else if (m_storage == STORAGE_IMAGE)
1853         {
1854                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1855                 {
1856                         if (!m_useAtomic)
1857                                 buf     << "    allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1858                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1859                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1860                         else
1861                                 buf << "        allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1862                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1863                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1864                 }
1865         }
1866         else
1867                 DE_ASSERT(DE_FALSE);
1868
1869         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1870                 << "}\n";
1871
1872         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1873 }
1874
1875 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1876 {
1877         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1878         std::ostringstream      buf;
1879
1880         buf << "${GLSL_VERSION_DECL}\n"
1881                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1882                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1883
1884         if (m_storage == STORAGE_BUFFER)
1885                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1886                         << "{\n"
1887                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1888                         << "} sb_in0;\n"
1889                         << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1890                         << "{\n"
1891                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1892                         << "} sb_in1;\n";
1893         else if (m_storage == STORAGE_IMAGE)
1894                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1895                         << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1896         else
1897                 DE_ASSERT(DE_FALSE);
1898
1899         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1900                 << "{\n"
1901                 << "    highp int resultOk[];\n"
1902                 << "} sb_result;\n"
1903                 << "\n"
1904                 << "void main (void)\n"
1905                 << "{\n"
1906                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1907                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1908                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1909                 << "    bool allOk = true;\n"
1910                 << "\n";
1911
1912         // Verify data
1913
1914         if (m_storage == STORAGE_BUFFER)
1915         {
1916                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1917                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1918                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1919         }
1920         else if (m_storage == STORAGE_IMAGE)
1921         {
1922                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1925         }
1926         else
1927                 DE_ASSERT(DE_FALSE);
1928
1929         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1930                 << "}\n";
1931
1932         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1933 }
1934
1935 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1936 {
1937         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1938         std::ostringstream      buf;
1939
1940         buf << "${GLSL_VERSION_DECL}\n"
1941                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1942                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1943
1944         if (m_storage == STORAGE_BUFFER)
1945                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1946                         << "{\n"
1947                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1948                         << "} sb_out;\n";
1949         else if (m_storage == STORAGE_IMAGE)
1950                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1951         else
1952                 DE_ASSERT(DE_FALSE);
1953
1954         buf << "\n"
1955                 << "void main (void)\n"
1956                 << "{\n"
1957                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1958                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1959                 << "\n";
1960
1961         // Write to buffer/image m_perInvocationSize elements
1962         if (m_storage == STORAGE_BUFFER)
1963         {
1964                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1965                 {
1966                         if (m_useAtomic)
1967                                 buf << "        atomicExchange(";
1968                         else
1969                                 buf << "        ";
1970
1971                         buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1972
1973                         if (m_useAtomic)
1974                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1975                         else
1976                                 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1977                 }
1978         }
1979         else if (m_storage == STORAGE_IMAGE)
1980         {
1981                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1982                 {
1983                         if (m_useAtomic)
1984                                 buf << "        imageAtomicExchange";
1985                         else
1986                                 buf << "        imageStore";
1987
1988                         buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1989
1990                         if (m_useAtomic)
1991                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1992                         else
1993                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1994                 }
1995         }
1996         else
1997                 DE_ASSERT(DE_FALSE);
1998
1999         buf << "}\n";
2000
2001         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2002 }
2003
2004 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2005 {
2006         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2007         std::ostringstream      buf;
2008
2009         buf << "${GLSL_VERSION_DECL}\n"
2010                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2011                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2012
2013         if (m_storage == STORAGE_BUFFER)
2014                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2015                         << "{\n"
2016                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2017                         << "} sb_in;\n";
2018         else if (m_storage == STORAGE_IMAGE)
2019                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2020         else
2021                 DE_ASSERT(DE_FALSE);
2022
2023         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2024                 << "{\n"
2025                 << "    highp int resultOk[];\n"
2026                 << "} sb_result;\n"
2027                 << "\n"
2028                 << "void main (void)\n"
2029                 << "{\n"
2030                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2031                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2032                 << "    int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2033                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2034                 << "    bool allOk = true;\n"
2035                 << "\n";
2036
2037         // Verify data
2038
2039         if (m_storage == STORAGE_BUFFER)
2040         {
2041                 buf << "        if (groupNdx % 2 == 0)\n"
2042                         << "    {\n";
2043                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2044                         buf << "                allOk = allOk && ("
2045                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2046                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2047                 buf << "        }\n"
2048                         << "    else\n"
2049                         << "    {\n";
2050                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2051                         buf << "                allOk = allOk && ("
2052                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2053                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2054                 buf << "        }\n";
2055         }
2056         else if (m_storage == STORAGE_IMAGE)
2057         {
2058                 buf << "        if (groupNdx % 2 == 0)\n"
2059                         << "    {\n";
2060                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2061                         buf << "                allOk = allOk && ("
2062                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2063                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2064                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2065                 buf << "        }\n"
2066                         << "    else\n"
2067                         << "    {\n";
2068                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2069                         buf << "                allOk = allOk && ("
2070                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2071                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2072                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2073                 buf << "        }\n";
2074         }
2075         else
2076                 DE_ASSERT(DE_FALSE);
2077
2078         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2079                 << "}\n";
2080
2081         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2082 }
2083
2084 glu::ShaderProgram*     InterCallTestCase::genReadZeroProgram (void)
2085 {
2086         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2087         std::ostringstream      buf;
2088
2089         buf << "${GLSL_VERSION_DECL}\n"
2090                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2091                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2092
2093         if (m_storage == STORAGE_BUFFER)
2094                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2095                         << "{\n"
2096                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2097                         << "} sb_in;\n";
2098         else if (m_storage == STORAGE_IMAGE)
2099                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2100         else
2101                 DE_ASSERT(DE_FALSE);
2102
2103         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2104                 << "{\n"
2105                 << "    highp int resultOk[];\n"
2106                 << "} sb_result;\n"
2107                 << "\n"
2108                 << "void main (void)\n"
2109                 << "{\n"
2110                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2111                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2112                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2113                 << "    bool allOk = true;\n"
2114                 << "\n";
2115
2116         // Verify data
2117
2118         if (m_storage == STORAGE_BUFFER)
2119         {
2120                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2121                         buf << "        allOk = allOk && ("
2122                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2123                                 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2124         }
2125         else if (m_storage == STORAGE_IMAGE)
2126         {
2127                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2128                         buf << "        allOk = allOk && ("
2129                         << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2130                         << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2131         }
2132         else
2133                 DE_ASSERT(DE_FALSE);
2134
2135         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2136                 << "}\n";
2137
2138         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2139 }
2140
2141 class SSBOConcurrentAtomicCase : public TestCase
2142 {
2143 public:
2144
2145                                                         SSBOConcurrentAtomicCase        (Context& context, const char* name, const char* description, int numCalls, int workSize);
2146                                                         ~SSBOConcurrentAtomicCase       (void);
2147
2148         void                                    init                                            (void);
2149         void                                    deinit                                          (void);
2150         IterateResult                   iterate                                         (void);
2151
2152 private:
2153         std::string                             genComputeSource                        (void) const;
2154
2155         const int                               m_numCalls;
2156         const int                               m_workSize;
2157         glu::ShaderProgram*             m_program;
2158         deUint32                                m_bufferID;
2159         std::vector<deUint32>   m_intermediateResultBuffers;
2160 };
2161
2162 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2163         : TestCase              (context, name, description)
2164         , m_numCalls    (numCalls)
2165         , m_workSize    (workSize)
2166         , m_program             (DE_NULL)
2167         , m_bufferID    (DE_NULL)
2168 {
2169 }
2170
2171 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2172 {
2173         deinit();
2174 }
2175
2176 void SSBOConcurrentAtomicCase::init (void)
2177 {
2178         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2179         std::vector<deUint32>   zeroData                        (m_workSize, 0);
2180
2181         // gen buffers
2182
2183         gl.genBuffers(1, &m_bufferID);
2184         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2185         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2186
2187         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2188         {
2189                 deUint32 buffer = 0;
2190
2191                 gl.genBuffers(1, &buffer);
2192                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2193                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2194
2195                 m_intermediateResultBuffers.push_back(buffer);
2196                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2197         }
2198
2199         // gen program
2200
2201         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2202         m_testCtx.getLog() << *m_program;
2203         if (!m_program->isOk())
2204                 throw tcu::TestError("could not build program");
2205 }
2206
2207 void SSBOConcurrentAtomicCase::deinit (void)
2208 {
2209         if (m_bufferID)
2210         {
2211                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2212                 m_bufferID = 0;
2213         }
2214
2215         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2216                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2217         m_intermediateResultBuffers.clear();
2218
2219         delete m_program;
2220         m_program = DE_NULL;
2221 }
2222
2223 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2224 {
2225         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2226         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2227         std::vector<int>                deltas;
2228
2229         // generate unique deltas
2230         generateShuffledRamp(m_numCalls, deltas);
2231
2232         // invoke program N times, each with a different delta
2233         {
2234                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2235
2236                 m_testCtx.getLog()
2237                         << tcu::TestLog::Message
2238                         << "Running shader " << m_numCalls << " times.\n"
2239                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2240                         << "Setting u_atomicDelta to a unique value for each call.\n"
2241                         << tcu::TestLog::EndMessage;
2242
2243                 if (deltaLocation == -1)
2244                         throw tcu::TestError("u_atomicDelta location was -1");
2245
2246                 gl.useProgram(m_program->getProgram());
2247                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2248
2249                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2250                 {
2251                         m_testCtx.getLog()
2252                                 << tcu::TestLog::Message
2253                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2254                                 << tcu::TestLog::EndMessage;
2255
2256                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2257                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2258                         gl.dispatchCompute(m_workSize, 1, 1);
2259                 }
2260
2261                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2262         }
2263
2264         // Verify result
2265         {
2266                 std::vector<deUint32> result;
2267
2268                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2269
2270                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2271                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2272
2273                 for (int ndx = 0; ndx < m_workSize; ++ndx)
2274                 {
2275                         if (result[ndx] != sumValue)
2276                         {
2277                                 m_testCtx.getLog()
2278                                         << tcu::TestLog::Message
2279                                         << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2280                                         << "Work buffer contains invalid values."
2281                                         << tcu::TestLog::EndMessage;
2282
2283                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2284                                 return STOP;
2285                         }
2286                 }
2287
2288                 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2289         }
2290
2291         // verify steps
2292         {
2293                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2294                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2295
2296                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2297
2298                 // collect results
2299
2300                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2301                 {
2302                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2303                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2304                 }
2305
2306                 // verify values
2307
2308                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2309                 {
2310                         int                     invalidOperationNdx;
2311                         deUint32        errorDelta;
2312                         deUint32        errorExpected;
2313
2314                         // collect result chain for each element
2315                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2316                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2317
2318                         // check there exists a path from 0 to sumValue using each addition once
2319                         // decompose cumulative results to addition operations (all additions positive => this works)
2320
2321                         std::sort(valueChain.begin(), valueChain.end());
2322
2323                         // validate chain
2324                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2325                         {
2326                                 m_testCtx.getLog()
2327                                         << tcu::TestLog::Message
2328                                         << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2329                                         << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2330                                         << tcu::TestLog::EndMessage;
2331
2332                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2333                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2334                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2335
2336                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2337                                 return STOP;
2338                         }
2339                 }
2340
2341                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2342         }
2343
2344         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2345         return STOP;
2346 }
2347
2348 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2349 {
2350         std::ostringstream buf;
2351
2352         buf     << "${GLSL_VERSION_DECL}\n"
2353                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2354                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2355                 << "{\n"
2356                 << "    highp uint values[" << m_workSize << "];\n"
2357                 << "} sb_ires;\n"
2358                 << "\n"
2359                 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2360                 << "{\n"
2361                 << "    highp uint values[" << m_workSize << "];\n"
2362                 << "} sb_work;\n"
2363                 << "uniform highp uint u_atomicDelta;\n"
2364                 << "\n"
2365                 << "void main ()\n"
2366                 << "{\n"
2367                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2368                 << "    sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2369                 << "}";
2370
2371         return specializeShader(m_context, buf.str().c_str());
2372 }
2373
2374 class ConcurrentAtomicCounterCase : public TestCase
2375 {
2376 public:
2377
2378                                                         ConcurrentAtomicCounterCase             (Context& context, const char* name, const char* description, int numCalls, int workSize);
2379                                                         ~ConcurrentAtomicCounterCase    (void);
2380
2381         void                                    init                                                    (void);
2382         void                                    deinit                                                  (void);
2383         IterateResult                   iterate                                                 (void);
2384
2385 private:
2386         std::string                             genComputeSource                                (bool evenOdd) const;
2387
2388         const int                               m_numCalls;
2389         const int                               m_workSize;
2390         glu::ShaderProgram*             m_evenProgram;
2391         glu::ShaderProgram*             m_oddProgram;
2392         deUint32                                m_counterBuffer;
2393         deUint32                                m_intermediateResultBuffer;
2394 };
2395
2396 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2397         : TestCase                                      (context, name, description)
2398         , m_numCalls                            (numCalls)
2399         , m_workSize                            (workSize)
2400         , m_evenProgram                         (DE_NULL)
2401         , m_oddProgram                          (DE_NULL)
2402         , m_counterBuffer                       (DE_NULL)
2403         , m_intermediateResultBuffer(DE_NULL)
2404 {
2405 }
2406
2407 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2408 {
2409         deinit();
2410 }
2411
2412 void ConcurrentAtomicCounterCase::init (void)
2413 {
2414         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
2415         const std::vector<deUint32>     zeroData        (m_numCalls * m_workSize, 0);
2416
2417         // gen buffer
2418
2419         gl.genBuffers(1, &m_counterBuffer);
2420         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2421         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2422
2423         gl.genBuffers(1, &m_intermediateResultBuffer);
2424         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2425         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2426
2427         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2428
2429         // gen programs
2430
2431         {
2432                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2433
2434                 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2435                 m_testCtx.getLog() << *m_evenProgram;
2436                 if (!m_evenProgram->isOk())
2437                         throw tcu::TestError("could not build program");
2438         }
2439         {
2440                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2441
2442                 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2443                 m_testCtx.getLog() << *m_oddProgram;
2444                 if (!m_oddProgram->isOk())
2445                         throw tcu::TestError("could not build program");
2446         }
2447 }
2448
2449 void ConcurrentAtomicCounterCase::deinit (void)
2450 {
2451         if (m_counterBuffer)
2452         {
2453                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2454                 m_counterBuffer = 0;
2455         }
2456         if (m_intermediateResultBuffer)
2457         {
2458                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2459                 m_intermediateResultBuffer = 0;
2460         }
2461
2462         delete m_evenProgram;
2463         m_evenProgram = DE_NULL;
2464
2465         delete m_oddProgram;
2466         m_oddProgram = DE_NULL;
2467 }
2468
2469 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2470 {
2471         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2472
2473         // invoke program N times, each with a different delta
2474         {
2475                 const int evenCallNdxLocation   = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2476                 const int oddCallNdxLocation    = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2477
2478                 m_testCtx.getLog()
2479                         << tcu::TestLog::Message
2480                         << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2481                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2482                         << tcu::TestLog::EndMessage;
2483
2484                 if (evenCallNdxLocation == -1)
2485                         throw tcu::TestError("u_callNdx location was -1");
2486                 if (oddCallNdxLocation == -1)
2487                         throw tcu::TestError("u_callNdx location was -1");
2488
2489                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2490                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2491
2492                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2493                 {
2494                         gl.useProgram(m_evenProgram->getProgram());
2495                         gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2496                         gl.dispatchCompute(m_workSize, 1, 1);
2497
2498                         gl.useProgram(m_oddProgram->getProgram());
2499                         gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2500                         gl.dispatchCompute(m_workSize, 1, 1);
2501                 }
2502
2503                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2504         }
2505
2506         // Verify result
2507         {
2508                 deUint32 result;
2509
2510                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2511
2512                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2513                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2514
2515                 if ((int)result != m_numCalls*m_workSize)
2516                 {
2517                         m_testCtx.getLog()
2518                                 << tcu::TestLog::Message
2519                                 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2520                                 << tcu::TestLog::EndMessage;
2521
2522                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2523                         return STOP;
2524                 }
2525
2526                 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2527         }
2528
2529         // verify steps
2530         {
2531                 std::vector<deUint32> intermediateResults;
2532
2533                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2534
2535                 // collect results
2536
2537                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2538                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2539
2540                 // verify values
2541
2542                 std::sort(intermediateResults.begin(), intermediateResults.end());
2543
2544                 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2545                 {
2546                         if ((int)intermediateResults[valueNdx] != valueNdx)
2547                         {
2548                                 m_testCtx.getLog()
2549                                         << tcu::TestLog::Message
2550                                         << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2551                                         << "Intermediate buffer contains invalid values. Intermediate results:\n"
2552                                         << tcu::TestLog::EndMessage;
2553
2554                                 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2555                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2556
2557                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2558                                 return STOP;
2559                         }
2560                 }
2561
2562                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2563         }
2564
2565         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2566         return STOP;
2567 }
2568
2569 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2570 {
2571         std::ostringstream buf;
2572
2573         buf     << "${GLSL_VERSION_DECL}\n"
2574                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2575                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2576                 << "{\n"
2577                 << "    highp uint values[" << m_workSize * m_numCalls << "];\n"
2578                 << "} sb_ires;\n"
2579                 << "\n"
2580                 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2581                 << "uniform highp uint u_callNdx;\n"
2582                 << "\n"
2583                 << "void main ()\n"
2584                 << "{\n"
2585                 << "    highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2586                 << "    if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2587                 << "            sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2588                 << "}";
2589
2590         return specializeShader(m_context, buf.str().c_str());
2591 }
2592
2593 class ConcurrentImageAtomicCase : public TestCase
2594 {
2595 public:
2596
2597                                                         ConcurrentImageAtomicCase       (Context& context, const char* name, const char* description, int numCalls, int workSize);
2598                                                         ~ConcurrentImageAtomicCase      (void);
2599
2600         void                                    init                                            (void);
2601         void                                    deinit                                          (void);
2602         IterateResult                   iterate                                         (void);
2603
2604 private:
2605         void                                    readWorkImage                           (std::vector<deUint32>& result);
2606
2607         std::string                             genComputeSource                        (void) const;
2608         std::string                             genImageReadSource                      (void) const;
2609         std::string                             genImageClearSource                     (void) const;
2610
2611         const int                               m_numCalls;
2612         const int                               m_workSize;
2613         glu::ShaderProgram*             m_program;
2614         glu::ShaderProgram*             m_imageReadProgram;
2615         glu::ShaderProgram*             m_imageClearProgram;
2616         deUint32                                m_imageID;
2617         std::vector<deUint32>   m_intermediateResultBuffers;
2618 };
2619
2620 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2621         : TestCase                              (context, name, description)
2622         , m_numCalls                    (numCalls)
2623         , m_workSize                    (workSize)
2624         , m_program                             (DE_NULL)
2625         , m_imageReadProgram    (DE_NULL)
2626         , m_imageClearProgram   (DE_NULL)
2627         , m_imageID                             (DE_NULL)
2628 {
2629 }
2630
2631 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2632 {
2633         deinit();
2634 }
2635
2636 void ConcurrentImageAtomicCase::init (void)
2637 {
2638         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2639         std::vector<deUint32>   zeroData                        (m_workSize * m_workSize, 0);
2640         const bool                              supportsES32            = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
2641
2642         if (!supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2643                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2644
2645         // gen image
2646
2647         gl.genTextures(1, &m_imageID);
2648         gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2649         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2650         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2651         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2652         GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2653
2654         // gen buffers
2655
2656         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2657         {
2658                 deUint32 buffer = 0;
2659
2660                 gl.genBuffers(1, &buffer);
2661                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2662                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2663
2664                 m_intermediateResultBuffers.push_back(buffer);
2665                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2666         }
2667
2668         // gen programs
2669
2670         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2671         m_testCtx.getLog() << *m_program;
2672         if (!m_program->isOk())
2673                 throw tcu::TestError("could not build program");
2674
2675         m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2676         if (!m_imageReadProgram->isOk())
2677         {
2678                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2679
2680                 m_testCtx.getLog() << *m_imageReadProgram;
2681                 throw tcu::TestError("could not build program");
2682         }
2683
2684         m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2685         if (!m_imageClearProgram->isOk())
2686         {
2687                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2688
2689                 m_testCtx.getLog() << *m_imageClearProgram;
2690                 throw tcu::TestError("could not build program");
2691         }
2692 }
2693
2694 void ConcurrentImageAtomicCase::deinit (void)
2695 {
2696         if (m_imageID)
2697         {
2698                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2699                 m_imageID = 0;
2700         }
2701
2702         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2703                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2704         m_intermediateResultBuffers.clear();
2705
2706         delete m_program;
2707         m_program = DE_NULL;
2708
2709         delete m_imageReadProgram;
2710         m_imageReadProgram = DE_NULL;
2711
2712         delete m_imageClearProgram;
2713         m_imageClearProgram = DE_NULL;
2714 }
2715
2716 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2717 {
2718         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2719         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2720         std::vector<int>                deltas;
2721
2722         // generate unique deltas
2723         generateShuffledRamp(m_numCalls, deltas);
2724
2725         // clear image
2726         {
2727                 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2728
2729                 gl.useProgram(m_imageClearProgram->getProgram());
2730                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2731                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2732                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2733
2734                 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2735         }
2736
2737         // invoke program N times, each with a different delta
2738         {
2739                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2740
2741                 m_testCtx.getLog()
2742                         << tcu::TestLog::Message
2743                         << "Running shader " << m_numCalls << " times.\n"
2744                         << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2745                         << "Setting u_atomicDelta to a unique value for each call.\n"
2746                         << tcu::TestLog::EndMessage;
2747
2748                 if (deltaLocation == -1)
2749                         throw tcu::TestError("u_atomicDelta location was -1");
2750
2751                 gl.useProgram(m_program->getProgram());
2752                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2753
2754                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2755                 {
2756                         m_testCtx.getLog()
2757                                 << tcu::TestLog::Message
2758                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2759                                 << tcu::TestLog::EndMessage;
2760
2761                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2762                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2763                         gl.dispatchCompute(m_workSize, m_workSize, 1);
2764                 }
2765
2766                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2767         }
2768
2769         // Verify result
2770         {
2771                 std::vector<deUint32> result;
2772
2773                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2774
2775                 readWorkImage(result);
2776
2777                 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2778                 {
2779                         if (result[ndx] != sumValue)
2780                         {
2781                                 m_testCtx.getLog()
2782                                         << tcu::TestLog::Message
2783                                         << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2784                                         << "Work image contains invalid values."
2785                                         << tcu::TestLog::EndMessage;
2786
2787                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2788                                 return STOP;
2789                         }
2790                 }
2791
2792                 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2793         }
2794
2795         // verify steps
2796         {
2797                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2798                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2799                 std::vector<deUint32>                           chainDelta                      (m_numCalls);
2800
2801                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2802
2803                 // collect results
2804
2805                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2806                 {
2807                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2808                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2809                 }
2810
2811                 // verify values
2812
2813                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2814                 {
2815                         int                     invalidOperationNdx;
2816                         deUint32        errorDelta;
2817                         deUint32        errorExpected;
2818
2819                         // collect result chain for each element
2820                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2821                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2822
2823                         // check there exists a path from 0 to sumValue using each addition once
2824                         // decompose cumulative results to addition operations (all additions positive => this works)
2825
2826                         std::sort(valueChain.begin(), valueChain.end());
2827
2828                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2829                                 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2830
2831                         // chainDelta contains now the actual additions applied to the value
2832                         std::sort(chainDelta.begin(), chainDelta.end());
2833
2834                         // validate chain
2835                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2836                         {
2837                                 m_testCtx.getLog()
2838                                         << tcu::TestLog::Message
2839                                         << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2840                                         << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2841                                         << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2842                                         << tcu::TestLog::EndMessage;
2843
2844                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2845                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2846                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2847
2848                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2849                                 return STOP;
2850                         }
2851                 }
2852
2853                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2854         }
2855
2856         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2857         return STOP;
2858 }
2859
2860 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2861 {
2862         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2863         glu::Buffer                             resultBuffer    (m_context.getRenderContext());
2864
2865         // Read image to an ssbo
2866
2867         {
2868                 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2869
2870                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2871                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2872
2873                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2874                 gl.useProgram(m_imageReadProgram->getProgram());
2875
2876                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2877                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2878                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2879
2880                 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2881         }
2882
2883         // Read ssbo
2884         {
2885                 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2886                 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2887
2888                 if (!ptr)
2889                         throw tcu::TestError("mapBufferRange returned NULL");
2890
2891                 result.resize(m_workSize * m_workSize);
2892                 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2893
2894                 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2895                         throw tcu::TestError("unmapBuffer returned false");
2896         }
2897 }
2898
2899 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2900 {
2901         std::ostringstream buf;
2902
2903         buf     << "${GLSL_VERSION_DECL}\n"
2904                 << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2905                 << "\n"
2906                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2907                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2908                 << "{\n"
2909                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2910                 << "} sb_ires;\n"
2911                 << "\n"
2912                 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2913                 << "uniform highp uint u_atomicDelta;\n"
2914                 << "\n"
2915                 << "void main ()\n"
2916                 << "{\n"
2917                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2918                 << "    sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2919                 << "}";
2920
2921         return specializeShader(m_context, buf.str().c_str());
2922 }
2923
2924 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2925 {
2926         std::ostringstream buf;
2927
2928         buf     << "${GLSL_VERSION_DECL}\n"
2929                 << "\n"
2930                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2931                 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2932                 << "{\n"
2933                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2934                 << "} sb_res;\n"
2935                 << "\n"
2936                 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2937                 << "\n"
2938                 << "void main ()\n"
2939                 << "{\n"
2940                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2941                 << "    sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2942                 << "}";
2943
2944         return specializeShader(m_context, buf.str().c_str());
2945 }
2946
2947 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2948 {
2949         std::ostringstream buf;
2950
2951         buf     << "${GLSL_VERSION_DECL}\n"
2952                 << "\n"
2953                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2954                 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2955                 << "\n"
2956                 << "void main ()\n"
2957                 << "{\n"
2958                 << "    imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2959                 << "}";
2960
2961         return specializeShader(m_context, buf.str().c_str());
2962 }
2963
2964 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2965 {
2966 public:
2967                                                         ConcurrentSSBOAtomicCounterMixedCase    (Context& context, const char* name, const char* description, int numCalls, int workSize);
2968                                                         ~ConcurrentSSBOAtomicCounterMixedCase   (void);
2969
2970         void                                    init                                                                    (void);
2971         void                                    deinit                                                                  (void);
2972         IterateResult                   iterate                                                                 (void);
2973
2974 private:
2975         std::string                             genSSBOComputeSource                                    (void) const;
2976         std::string                             genAtomicCounterComputeSource                   (void) const;
2977
2978         const int                               m_numCalls;
2979         const int                               m_workSize;
2980         deUint32                                m_bufferID;
2981         glu::ShaderProgram*             m_ssboAtomicProgram;
2982         glu::ShaderProgram*             m_atomicCounterProgram;
2983 };
2984
2985 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2986         : TestCase                                      (context, name, description)
2987         , m_numCalls                            (numCalls)
2988         , m_workSize                            (workSize)
2989         , m_bufferID                            (DE_NULL)
2990         , m_ssboAtomicProgram           (DE_NULL)
2991         , m_atomicCounterProgram        (DE_NULL)
2992 {
2993         // SSBO atomic XORs cancel out
2994         DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2995 }
2996
2997 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
2998 {
2999         deinit();
3000 }
3001
3002 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3003 {
3004         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
3005         const deUint32                          zeroBuf[2]      = { 0, 0 };
3006
3007         // gen buffer
3008
3009         gl.genBuffers(1, &m_bufferID);
3010         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3011         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3012
3013         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3014
3015         // gen programs
3016
3017         {
3018                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3019
3020                 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3021                 m_testCtx.getLog() << *m_ssboAtomicProgram;
3022                 if (!m_ssboAtomicProgram->isOk())
3023                         throw tcu::TestError("could not build program");
3024         }
3025         {
3026                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3027
3028                 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3029                 m_testCtx.getLog() << *m_atomicCounterProgram;
3030                 if (!m_atomicCounterProgram->isOk())
3031                         throw tcu::TestError("could not build program");
3032         }
3033 }
3034
3035 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3036 {
3037         if (m_bufferID)
3038         {
3039                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3040                 m_bufferID = 0;
3041         }
3042
3043         delete m_ssboAtomicProgram;
3044         m_ssboAtomicProgram = DE_NULL;
3045
3046         delete m_atomicCounterProgram;
3047         m_atomicCounterProgram = DE_NULL;
3048 }
3049
3050 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3051 {
3052         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3053
3054         m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3055
3056         // invoke programs N times
3057         {
3058                 m_testCtx.getLog()
3059                         << tcu::TestLog::Message
3060                         << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3061                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
3062                         << tcu::TestLog::EndMessage;
3063
3064                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3065                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3066
3067                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3068                 {
3069                         gl.useProgram(m_atomicCounterProgram->getProgram());
3070                         gl.dispatchCompute(m_workSize, 1, 1);
3071
3072                         gl.useProgram(m_ssboAtomicProgram->getProgram());
3073                         gl.dispatchCompute(m_workSize, 1, 1);
3074                 }
3075
3076                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3077         }
3078
3079         // Verify result
3080         {
3081                 deUint32 result;
3082
3083                 // XORs cancel out, only addition is left
3084                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3085
3086                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3087                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3088
3089                 if ((int)result != m_numCalls*m_workSize)
3090                 {
3091                         m_testCtx.getLog()
3092                                 << tcu::TestLog::Message
3093                                 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3094                                 << tcu::TestLog::EndMessage;
3095
3096                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3097                         return STOP;
3098                 }
3099
3100                 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3101         }
3102
3103         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3104         return STOP;
3105 }
3106
3107 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3108 {
3109         std::ostringstream buf;
3110
3111         buf     << "${GLSL_VERSION_DECL}\n"
3112                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3113                 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3114                 << "{\n"
3115                 << "    highp uint targetValue;\n"
3116                 << "    highp uint dummy;\n"
3117                 << "} sb_work;\n"
3118                 << "\n"
3119                 << "void main ()\n"
3120                 << "{\n"
3121                 << "    // flip high bits\n"
3122                 << "    highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3123                 << "    sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3124                 << "}";
3125
3126         return specializeShader(m_context, buf.str().c_str());
3127 }
3128
3129 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3130 {
3131         std::ostringstream buf;
3132
3133         buf     << "${GLSL_VERSION_DECL}\n"
3134                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3135                 << "\n"
3136                 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3137                 << "\n"
3138                 << "void main ()\n"
3139                 << "{\n"
3140                 << "    atomicCounterIncrement(u_counter);\n"
3141                 << "}";
3142
3143         return specializeShader(m_context, buf.str().c_str());
3144 }
3145
3146 } // anonymous
3147
3148 SynchronizationTests::SynchronizationTests (Context& context)
3149         : TestCaseGroup(context, "synchronization", "Synchronization tests")
3150 {
3151 }
3152
3153 SynchronizationTests::~SynchronizationTests (void)
3154 {
3155 }
3156
3157 void SynchronizationTests::init (void)
3158 {
3159         tcu::TestCaseGroup* const inInvocationGroup             = new tcu::TestCaseGroup(m_testCtx, "in_invocation",    "Test intra-invocation synchronization");
3160         tcu::TestCaseGroup* const interInvocationGroup  = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3161         tcu::TestCaseGroup* const interCallGroup                = new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3162
3163         addChild(inInvocationGroup);
3164         addChild(interInvocationGroup);
3165         addChild(interCallGroup);
3166
3167         // .in_invocation & .inter_invocation
3168         {
3169                 static const struct CaseConfig
3170                 {
3171                         const char*                                                                     namePrefix;
3172                         const InterInvocationTestCase::StorageType      storage;
3173                         const int                                                                       flags;
3174                 } configs[] =
3175                 {
3176                         { "image",                      InterInvocationTestCase::STORAGE_IMAGE,         0                                                                               },
3177                         { "image_atomic",       InterInvocationTestCase::STORAGE_IMAGE,         InterInvocationTestCase::FLAG_ATOMIC    },
3178                         { "ssbo",                       InterInvocationTestCase::STORAGE_BUFFER,        0                                                                               },
3179                         { "ssbo_atomic",        InterInvocationTestCase::STORAGE_BUFFER,        InterInvocationTestCase::FLAG_ATOMIC    },
3180                 };
3181
3182                 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3183                 {
3184                         tcu::TestCaseGroup* const       targetGroup     = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3185                         const int                                       extraFlags      = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3186
3187                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3188                         {
3189                                 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3190
3191                                 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3192                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3193                                                                                                                                   (std::string("Write to ") + target + " and read it").c_str(),
3194                                                                                                                                   configs[configNdx].storage,
3195                                                                                                                                   configs[configNdx].flags | extraFlags));
3196
3197                                 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3198                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3199                                                                                                                                   (std::string("Read form ") + target + " and then write to it").c_str(),
3200                                                                                                                                   configs[configNdx].storage,
3201                                                                                                                                   configs[configNdx].flags | extraFlags));
3202
3203                                 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3204                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3205                                                                                                                                   (std::string("Write to ") + target + " twice and read it").c_str(),
3206                                                                                                                                   configs[configNdx].storage,
3207                                                                                                                                   configs[configNdx].flags | extraFlags));
3208
3209                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3210                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3211                                                                                                                                    (std::string("Write to aliasing ") + target + " and read it").c_str(),
3212                                                                                                                                    InvocationAliasWriteCase::TYPE_WRITE,
3213                                                                                                                                    configs[configNdx].storage,
3214                                                                                                                                    configs[configNdx].flags | extraFlags));
3215
3216                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3217                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3218                                                                                                                                    (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3219                                                                                                                                    InvocationAliasWriteCase::TYPE_OVERWRITE,
3220                                                                                                                                    configs[configNdx].storage,
3221                                                                                                                                    configs[configNdx].flags | extraFlags));
3222                         }
3223                 }
3224         }
3225
3226         // .inter_call
3227         {
3228                 tcu::TestCaseGroup* const withBarrierGroup              = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3229                 tcu::TestCaseGroup* const withoutBarrierGroup   = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3230
3231                 interCallGroup->addChild(withBarrierGroup);
3232                 interCallGroup->addChild(withoutBarrierGroup);
3233
3234                 // .with_memory_barrier
3235                 {
3236                         static const struct CaseConfig
3237                         {
3238                                 const char*                                                             namePrefix;
3239                                 const InterCallTestCase::StorageType    storage;
3240                                 const int                                                               flags;
3241                         } configs[] =
3242                         {
3243                                 { "image",                      InterCallTestCase::STORAGE_IMAGE,       0                                                                                                                                               },
3244                                 { "image_atomic",       InterCallTestCase::STORAGE_IMAGE,       InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3245                                 { "ssbo",                       InterCallTestCase::STORAGE_BUFFER,      0                                                                                                                                               },
3246                                 { "ssbo_atomic",        InterCallTestCase::STORAGE_BUFFER,      InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3247                         };
3248
3249                         const int seed0 = 123;
3250                         const int seed1 = 457;
3251
3252                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3253                         {
3254                                 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3255
3256                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3257                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3258                                                                                                                                  (std::string("Write to ") + target + " and read it").c_str(),
3259                                                                                                                                  configs[configNdx].storage,
3260                                                                                                                                  configs[configNdx].flags,
3261                                                                                                                                  InterCallOperations()
3262                                                                                                                                         << op::WriteData::Generate(1, seed0)
3263                                                                                                                                         << op::Barrier()
3264                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3265
3266                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3267                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3268                                                                                                                                  (std::string("Read from ") + target + " and then write to it").c_str(),
3269                                                                                                                                  configs[configNdx].storage,
3270                                                                                                                                  configs[configNdx].flags,
3271                                                                                                                                  InterCallOperations()
3272                                                                                                                                         << op::ReadZeroData::Generate(1)
3273                                                                                                                                         << op::Barrier()
3274                                                                                                                                         << op::WriteData::Generate(1, seed0)));
3275
3276                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3277                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3278                                                                                                                                  (std::string("Write to ") + target + " twice and read it").c_str(),
3279                                                                                                                                  configs[configNdx].storage,
3280                                                                                                                                  configs[configNdx].flags,
3281                                                                                                                                  InterCallOperations()
3282                                                                                                                                         << op::WriteData::Generate(1, seed0)
3283                                                                                                                                         << op::Barrier()
3284                                                                                                                                         << op::WriteData::Generate(1, seed1)
3285                                                                                                                                         << op::Barrier()
3286                                                                                                                                         << op::ReadData::Generate(1, seed1)));
3287
3288                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3289                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3290                                                                                                                                  (std::string("Write to multiple ") + target + "s and read them").c_str(),
3291                                                                                                                                  configs[configNdx].storage,
3292                                                                                                                                  configs[configNdx].flags,
3293                                                                                                                                  InterCallOperations()
3294                                                                                                                                         << op::WriteData::Generate(1, seed0)
3295                                                                                                                                         << op::WriteData::Generate(2, seed1)
3296                                                                                                                                         << op::Barrier()
3297                                                                                                                                         << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3298
3299                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3300                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3301                                                                                                                                  (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3302                                                                                                                                  configs[configNdx].storage,
3303                                                                                                                                  configs[configNdx].flags,
3304                                                                                                                                  InterCallOperations()
3305                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed0, true)
3306                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed1, false)
3307                                                                                                                                         << op::Barrier()
3308                                                                                                                                         << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3309
3310                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3311                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3312                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3313                                                                                                                                  configs[configNdx].storage,
3314                                                                                                                                  configs[configNdx].flags,
3315                                                                                                                                  InterCallOperations()
3316                                                                                                                                         << op::WriteData::Generate(1, seed0)
3317                                                                                                                                         << op::WriteData::Generate(2, seed1)
3318                                                                                                                                         << op::Barrier()
3319                                                                                                                                         << op::ReadData::Generate(1, seed0)
3320                                                                                                                                         << op::ReadData::Generate(2, seed1)));
3321
3322                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3323                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3324                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3325                                                                                                                                  configs[configNdx].storage,
3326                                                                                                                                  configs[configNdx].flags,
3327                                                                                                                                  InterCallOperations()
3328                                                                                                                                         << op::WriteData::Generate(1, seed0)
3329                                                                                                                                         << op::WriteData::Generate(2, seed1)
3330                                                                                                                                         << op::Barrier()
3331                                                                                                                                         << op::ReadData::Generate(2, seed1)
3332                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3333                         }
3334
3335                         // .without_memory_barrier
3336                         {
3337                                 struct InvocationConfig
3338                                 {
3339                                         const char*     name;
3340                                         int                     count;
3341                                 };
3342
3343                                 static const InvocationConfig ssboInvocations[] =
3344                                 {
3345                                         { "1k",         1024    },
3346                                         { "4k",         4096    },
3347                                         { "32k",        32768   },
3348                                 };
3349                                 static const InvocationConfig imageInvocations[] =
3350                                 {
3351                                         { "8x8",                8       },
3352                                         { "32x32",              32      },
3353                                         { "128x128",    128     },
3354                                 };
3355                                 static const InvocationConfig counterInvocations[] =
3356                                 {
3357                                         { "32",         32              },
3358                                         { "128",        128             },
3359                                         { "1k",         1024    },
3360                                 };
3361                                 static const int callCounts[] = { 2, 5, 100 };
3362
3363                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3364                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3365                                                 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),       "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3366
3367                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3368                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3369                                                 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),    "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3370
3371                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3372                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3373                                                 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),      "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3374
3375                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3376                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3377                                                 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),  "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3378                         }
3379                 }
3380         }
3381 }
3382
3383 } // Functional
3384 } // gles31
3385 } // deqp