modules/gles31/functional/es31fSynchronizationTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * drawElements Quality Program OpenGL ES 3.1 Module
   3  * -------------------------------------------------
   4  *
   5  * Copyright 2014 The Android Open Source Project
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  *//*!
  20  * \file
  21  * \brief Synchronization Tests
  22  *//*--------------------------------------------------------------------*/
  23
  24 #include "es31fSynchronizationTests.hpp"
  25 #include "tcuTestLog.hpp"
  26 #include "tcuStringTemplate.hpp"
  27 #include "tcuSurface.hpp"
  28 #include "tcuRenderTarget.hpp"
  29 #include "gluRenderContext.hpp"
  30 #include "gluShaderProgram.hpp"
  31 #include "gluObjectWrapper.hpp"
  32 #include "gluPixelTransfer.hpp"
  33 #include "gluContextInfo.hpp"
  34 #include "glwFunctions.hpp"
  35 #include "glwEnums.hpp"
  36 #include "deStringUtil.hpp"
  37 #include "deSharedPtr.hpp"
  38 #include "deMemory.h"
  39 #include "deRandom.hpp"
  40
  41 #include <map>
  42
  43 namespace deqp
  44 {
  45 namespace gles31
  46 {
  47 namespace Functional
  48 {
  49 namespace
  50 {
  51
  52 static bool checkSupport(Context& ctx)
  53 {
  54         auto ctxType = ctx.getRenderContext().getType();
  55         return contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
  56                    contextSupports(ctxType, glu::ApiType::core(4, 5)) ||
  57                    ctx.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic");
  58 }
  59
  60 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
  61 {
  62         std::vector<deUint32> chainDelta(valueChain.size());
  63
  64         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
  65                 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
  66
  67         // chainDelta contains now the actual additions applied to the value
  68         // check there exists an addition ramp form 1 to ...
  69         std::sort(chainDelta.begin(), chainDelta.end());
  70
  71         for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
  72         {
  73                 if ((int)chainDelta[callNdx] != callNdx+1)
  74                 {
  75                         invalidOperationNdx = callNdx;
  76                         errorDelta = chainDelta[callNdx];
  77                         errorExpected = callNdx+1;
  78
  79                         return false;
  80                 }
  81         }
  82
  83         return true;
  84 }
  85
  86 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
  87 {
  88         const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
  89         GLU_EXPECT_NO_ERROR(gl.getError(), "map");
  90
  91         if (!ptr)
  92                 throw tcu::TestError("mapBufferRange returned NULL");
  93
  94         result.resize(numElements);
  95         memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
  96
  97         if (gl.unmapBuffer(target) == GL_FALSE)
  98                 throw tcu::TestError("unmapBuffer returned false");
  99 }
 100
 101 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
 102 {
 103         std::vector<deUint32> vec;
 104
 105         readBuffer(gl, target, 1, vec);
 106
 107         return vec[0];
 108 }
 109
 110 //! Generate a ramp of values from 1 to numElements, and shuffle it
 111 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
 112 {
 113         de::Random rng(0xabcd);
 114
 115         // some positive (non-zero) unique values
 116         ramp.resize(numElements);
 117         for (int callNdx = 0; callNdx < numElements; ++callNdx)
 118                 ramp[callNdx] = callNdx + 1;
 119
 120         rng.shuffle(ramp.begin(), ramp.end());
 121 }
 122
 123 static std::string specializeShader(Context& context, const char* code)
 124 {
 125         auto                                    ctxType                 = context.getRenderContext().getType();
 126         const bool                              isES32orGL45    = glu::contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
 127                                                                                           glu::contextSupports(ctxType, glu::ApiType::core(4, 5));
 128         const glu::GLSLVersion  glslVersion             = glu::getContextTypeGLSLVersion(ctxType);
 129
 130         std::map<std::string, std::string> specializationMap;
 131         specializationMap["GLSL_VERSION_DECL"]                          = glu::getGLSLVersionDeclaration(glslVersion);
 132         specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"]        = isES32orGL45 ? "" : "#extension GL_OES_shader_image_atomic : require";
 133
 134         return tcu::StringTemplate(code).specialize(specializationMap);
 135 }
 136
 137 class InterInvocationTestCase : public TestCase
 138 {
 139 public:
 140         enum StorageType
 141         {
 142                 STORAGE_BUFFER = 0,
 143                 STORAGE_IMAGE,
 144
 145                 STORAGE_LAST
 146         };
 147         enum CaseFlags
 148         {
 149                 FLAG_ATOMIC                             = 0x1,
 150                 FLAG_ALIASING_STORAGES  = 0x2,
 151                 FLAG_IN_GROUP                   = 0x4,
 152         };
 153
 154                                                 InterInvocationTestCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
 155                                                 ~InterInvocationTestCase        (void);
 156
 157 private:
 158         void                            init                                            (void);
 159         void                            deinit                                          (void);
 160         IterateResult           iterate                                         (void);
 161
 162         void                            runCompute                                      (void);
 163         bool                            verifyResults                           (void);
 164         virtual std::string     genShaderSource                         (void) const = 0;
 165
 166 protected:
 167         std::string                     genBarrierSource                        (void) const;
 168
 169         const StorageType       m_storage;
 170         const bool                      m_useAtomic;
 171         const bool                      m_aliasingStorages;
 172         const bool                      m_syncWithGroup;
 173         const int                       m_workWidth;                            // !< total work width
 174         const int                       m_workHeight;                           // !<     ...    height
 175         const int                       m_localWidth;                           // !< group width
 176         const int                       m_localHeight;                          // !< group height
 177         const int                       m_elementsPerInvocation;        // !< elements accessed by a single invocation
 178
 179 private:
 180         glw::GLuint                     m_storageBuf;
 181         glw::GLuint                     m_storageTex;
 182         glw::GLuint                     m_resultBuf;
 183         glu::ShaderProgram*     m_program;
 184 };
 185
 186 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 187         : TestCase                                      (context, name, desc)
 188         , m_storage                                     (storage)
 189         , m_useAtomic                           ((flags & FLAG_ATOMIC) != 0)
 190         , m_aliasingStorages            ((flags & FLAG_ALIASING_STORAGES) != 0)
 191         , m_syncWithGroup                       ((flags & FLAG_IN_GROUP) != 0)
 192         , m_workWidth                           (256)
 193         , m_workHeight                          (256)
 194         , m_localWidth                          (16)
 195         , m_localHeight                         (8)
 196         , m_elementsPerInvocation       (8)
 197         , m_storageBuf                          (0)
 198         , m_storageTex                          (0)
 199         , m_resultBuf                           (0)
 200         , m_program                                     (DE_NULL)
 201 {
 202         DE_ASSERT(m_storage < STORAGE_LAST);
 203         DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
 204 }
 205
 206 InterInvocationTestCase::~InterInvocationTestCase (void)
 207 {
 208         deinit();
 209 }
 210
 211 void InterInvocationTestCase::init (void)
 212 {
 213         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
 214
 215         // requirements
 216
 217         if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
 218                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
 219
 220         // program
 221
 222         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
 223         m_testCtx.getLog() << *m_program;
 224         if (!m_program->isOk())
 225                 throw tcu::TestError("could not build program");
 226
 227         // source
 228
 229         if (m_storage == STORAGE_BUFFER)
 230         {
 231                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
 232                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
 233                 std::vector<deUint32>   zeroBuffer              (bufferElements, 0);
 234
 235                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
 236
 237                 gl.genBuffers(1, &m_storageBuf);
 238                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
 239                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
 240                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
 241         }
 242         else if (m_storage == STORAGE_IMAGE)
 243         {
 244                 const int                               bufferElements  = m_workWidth * m_workHeight * m_elementsPerInvocation;
 245                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
 246
 247                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
 248
 249                 gl.genTextures(1, &m_storageTex);
 250                 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
 251                 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
 252                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 253                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 254                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
 255
 256                 // Zero-fill
 257                 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
 258
 259                 {
 260                         const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
 261                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
 262                         GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
 263                 }
 264         }
 265         else
 266                 DE_ASSERT(DE_FALSE);
 267
 268         // destination
 269
 270         {
 271                 const int                               bufferElements  = m_workWidth * m_workHeight;
 272                 const int                               bufferSize              = bufferElements * (int)sizeof(deUint32);
 273                 std::vector<deInt32>    negativeBuffer  (bufferElements, -1);
 274
 275                 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
 276
 277                 gl.genBuffers(1, &m_resultBuf);
 278                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
 279                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
 280                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
 281         }
 282 }
 283
 284 void InterInvocationTestCase::deinit (void)
 285 {
 286         if (m_storageBuf)
 287         {
 288                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
 289                 m_storageBuf = DE_NULL;
 290         }
 291
 292         if (m_storageTex)
 293         {
 294                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
 295                 m_storageTex = DE_NULL;
 296         }
 297
 298         if (m_resultBuf)
 299         {
 300                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
 301                 m_resultBuf = DE_NULL;
 302         }
 303
 304         delete m_program;
 305         m_program = DE_NULL;
 306 }
 307
 308 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
 309 {
 310         // Dispatch
 311         runCompute();
 312
 313         // Verify buffer contents
 314         if (verifyResults())
 315                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
 316         else
 317                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
 318
 319         return STOP;
 320 }
 321
 322 void InterInvocationTestCase::runCompute (void)
 323 {
 324         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
 325         const int                               groupsX = m_workWidth / m_localWidth;
 326         const int                               groupsY = m_workHeight / m_localHeight;
 327
 328         DE_ASSERT((m_workWidth % m_localWidth) == 0);
 329         DE_ASSERT((m_workHeight % m_localHeight) == 0);
 330
 331         m_testCtx.getLog()
 332                 << tcu::TestLog::Message
 333                 << "Dispatching compute.\n"
 334                 << "    group size: " << m_localWidth << "x" << m_localHeight << "\n"
 335                 << "    dispatch size: " << groupsX << "x" << groupsY << "\n"
 336                 << "    total work size: " << m_workWidth << "x" << m_workHeight << "\n"
 337                 << tcu::TestLog::EndMessage;
 338
 339         gl.useProgram(m_program->getProgram());
 340
 341         // source
 342         if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
 343         {
 344                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
 345                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
 346         }
 347         else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
 348         {
 349                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
 350                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
 351                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
 352
 353                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
 354         }
 355         else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
 356         {
 357                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
 358                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
 359         }
 360         else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
 361         {
 362                 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
 363                 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
 364
 365                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
 366
 367                 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
 368         }
 369         else
 370                 DE_ASSERT(DE_FALSE);
 371
 372         // destination
 373         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
 374         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
 375
 376         // dispatch
 377         gl.dispatchCompute(groupsX, groupsY, 1);
 378         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
 379 }
 380
 381 bool InterInvocationTestCase::verifyResults (void)
 382 {
 383         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
 384         const int                               errorFloodThreshold     = 5;
 385         int                                             numErrorsLogged         = 0;
 386         const void*                             mapped                          = DE_NULL;
 387         std::vector<deInt32>    results                         (m_workWidth * m_workHeight);
 388         bool                                    error                           = false;
 389
 390         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
 391         gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
 392         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
 393         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
 394
 395         // copy to properly aligned array
 396         deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
 397
 398         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
 399                 throw tcu::TestError("memory map store corrupted");
 400
 401         // check the results
 402         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
 403         {
 404                 if (results[ndx] != 1)
 405                 {
 406                         error = true;
 407
 408                         if (numErrorsLogged == 0)
 409                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
 410                         if (numErrorsLogged++ < errorFloodThreshold)
 411                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
 412                         else
 413                         {
 414                                 // after N errors, no point continuing verification
 415                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
 416                                 break;
 417                         }
 418                 }
 419         }
 420
 421         if (!error)
 422                 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
 423         return !error;
 424 }
 425
 426 std::string InterInvocationTestCase::genBarrierSource (void) const
 427 {
 428         std::ostringstream buf;
 429
 430         if (m_syncWithGroup)
 431         {
 432                 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
 433                 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
 434                 //       we only require intra-workgroup synchronization.
 435                 buf << "\n"
 436                         << "    groupMemoryBarrier();\n"
 437                         << "    barrier();\n"
 438                         << "\n";
 439         }
 440         else if (m_storage == STORAGE_BUFFER)
 441         {
 442                 DE_ASSERT(!m_syncWithGroup);
 443
 444                 // Waiting only for data written by this invocation. Since all buffer reads and writes are
 445                 // processed in order (within a single invocation), we don't have to do anything.
 446                 buf << "\n";
 447         }
 448         else if (m_storage == STORAGE_IMAGE)
 449         {
 450                 DE_ASSERT(!m_syncWithGroup);
 451
 452                 // Waiting only for data written by this invocation. But since operations complete in undefined
 453                 // order, we have to wait for them to complete.
 454                 buf << "\n"
 455                         << "    memoryBarrierImage();\n"
 456                         << "\n";
 457         }
 458         else
 459                 DE_ASSERT(DE_FALSE);
 460
 461         return buf.str();
 462 }
 463
 464 class InvocationBasicCase : public InterInvocationTestCase
 465 {
 466 public:
 467                                                         InvocationBasicCase             (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 468 private:
 469         std::string                             genShaderSource                 (void) const;
 470         virtual std::string             genShaderMainBlock              (void) const = 0;
 471 };
 472
 473 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 474         : InterInvocationTestCase(context, name, desc, storage, flags)
 475 {
 476 }
 477
 478 std::string InvocationBasicCase::genShaderSource (void) const
 479 {
 480         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
 481         std::ostringstream      buf;
 482
 483         buf << "${GLSL_VERSION_DECL}\n"
 484                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
 485                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
 486                 << "layout(binding=0, std430) buffer Output\n"
 487                 << "{\n"
 488                 << "    highp int values[];\n"
 489                 << "} sb_result;\n";
 490
 491         if (m_storage == STORAGE_BUFFER)
 492                 buf << "layout(binding=1, std430) coherent buffer Storage\n"
 493                         << "{\n"
 494                         << "    highp int values[];\n"
 495                         << "} sb_store;\n"
 496                         << "\n"
 497                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
 498                         << "{\n"
 499                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
 500                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
 501                         << "}\n";
 502         else if (m_storage == STORAGE_IMAGE)
 503                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
 504                         << "\n"
 505                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
 506                         << "{\n"
 507                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
 508                         << "}\n";
 509         else
 510                 DE_ASSERT(DE_FALSE);
 511
 512         buf << "\n"
 513                 << "void main (void)\n"
 514                 << "{\n"
 515                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
 516                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
 517                 << "    bool allOk      = true;\n"
 518                 << "\n"
 519                 << genShaderMainBlock()
 520                 << "\n"
 521                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
 522                 << "}\n";
 523
 524         return specializeShader(m_context, buf.str().c_str());
 525 }
 526
 527 class InvocationWriteReadCase : public InvocationBasicCase
 528 {
 529 public:
 530                                         InvocationWriteReadCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 531 private:
 532         std::string             genShaderMainBlock                      (void) const;
 533 };
 534
 535 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 536         : InvocationBasicCase(context, name, desc, storage, flags)
 537 {
 538 }
 539
 540 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
 541 {
 542         std::ostringstream buf;
 543
 544         // write
 545
 546         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 547         {
 548                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 549                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
 550                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 551                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
 552                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 553                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
 554                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 555                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
 556                 else
 557                         DE_ASSERT(DE_FALSE);
 558         }
 559
 560         // barrier
 561
 562         buf << genBarrierSource();
 563
 564         // read
 565
 566         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 567         {
 568                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 569
 570                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 571                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
 572                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 573                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
 574                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 575                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
 576                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 577                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
 578                 else
 579                         DE_ASSERT(DE_FALSE);
 580         }
 581
 582         return buf.str();
 583 }
 584
 585 class InvocationReadWriteCase : public InvocationBasicCase
 586 {
 587 public:
 588                                         InvocationReadWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 589 private:
 590         std::string             genShaderMainBlock                      (void) const;
 591 };
 592
 593 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 594         : InvocationBasicCase(context, name, desc, storage, flags)
 595 {
 596 }
 597
 598 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
 599 {
 600         std::ostringstream buf;
 601
 602         // read
 603
 604         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 605         {
 606                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 607
 608                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 609                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
 610                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 611                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
 612                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 613                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
 614                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 615                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
 616                 else
 617                         DE_ASSERT(DE_FALSE);
 618         }
 619
 620         // barrier
 621
 622         buf << genBarrierSource();
 623
 624         // write
 625
 626         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 627         {
 628                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 629                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
 630                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 631                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
 632                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 633                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
 634                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 635                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
 636                 else
 637                         DE_ASSERT(DE_FALSE);
 638         }
 639
 640         return buf.str();
 641 }
 642
 643 class InvocationOverWriteCase : public InvocationBasicCase
 644 {
 645 public:
 646                                         InvocationOverWriteCase         (Context& context, const char* name, const char* desc, StorageType storage, int flags);
 647 private:
 648         std::string             genShaderMainBlock                      (void) const;
 649 };
 650
 651 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
 652         : InvocationBasicCase(context, name, desc, storage, flags)
 653 {
 654 }
 655
 656 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
 657 {
 658         std::ostringstream buf;
 659
 660         // write
 661
 662         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 663         {
 664                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 665                         buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
 666                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 667                         buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
 668                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 669                         buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
 670                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 671                         buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
 672                 else
 673                         DE_ASSERT(DE_FALSE);
 674         }
 675
 676         // barrier
 677
 678         buf << genBarrierSource();
 679
 680         // write over
 681
 682         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 683         {
 684                 // write another invocation's value or our own value depending on test type
 685                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 686
 687                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 688                         buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
 689                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 690                         buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
 691                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 692                         buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
 693                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 694                         buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
 695                 else
 696                         DE_ASSERT(DE_FALSE);
 697         }
 698
 699         // barrier
 700
 701         buf << genBarrierSource();
 702
 703         // read
 704
 705         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 706         {
 707                 // check another invocation's value or our own value depending on test type
 708                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 709
 710                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 711                         buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
 712                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 713                         buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
 714                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 715                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
 716                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 717                         buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
 718                 else
 719                         DE_ASSERT(DE_FALSE);
 720         }
 721
 722         return buf.str();
 723 }
 724
 725 class InvocationAliasWriteCase : public InterInvocationTestCase
 726 {
 727 public:
 728         enum TestType
 729         {
 730                 TYPE_WRITE = 0,
 731                 TYPE_OVERWRITE,
 732
 733                 TYPE_LAST
 734         };
 735
 736                                         InvocationAliasWriteCase        (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
 737 private:
 738         std::string             genShaderSource                         (void) const;
 739
 740         const TestType  m_type;
 741 };
 742
 743 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
 744         : InterInvocationTestCase       (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
 745         , m_type                                        (type)
 746 {
 747         DE_ASSERT(type < TYPE_LAST);
 748 }
 749
 750 std::string InvocationAliasWriteCase::genShaderSource (void) const
 751 {
 752         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
 753         std::ostringstream      buf;
 754
 755         buf << "${GLSL_VERSION_DECL}\n"
 756                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
 757                 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
 758                 << "layout(binding=0, std430) buffer Output\n"
 759                 << "{\n"
 760                 << "    highp int values[];\n"
 761                 << "} sb_result;\n";
 762
 763         if (m_storage == STORAGE_BUFFER)
 764                 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
 765                         << "{\n"
 766                         << "    highp int values[];\n"
 767                         << "} sb_store0;\n"
 768                         << "layout(binding=2, std430) coherent buffer Storage1\n"
 769                         << "{\n"
 770                         << "    highp int values[];\n"
 771                         << "} sb_store1;\n"
 772                         << "\n"
 773                         << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
 774                         << "{\n"
 775                         << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
 776                         << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
 777                         << "}\n";
 778         else if (m_storage == STORAGE_IMAGE)
 779                 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
 780                         << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
 781                         << "\n"
 782                         << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
 783                         << "{\n"
 784                         << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
 785                         << "}\n";
 786         else
 787                 DE_ASSERT(DE_FALSE);
 788
 789         buf << "\n"
 790                 << "void main (void)\n"
 791                 << "{\n"
 792                 << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
 793                 << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
 794                 << "    bool allOk      = true;\n"
 795                 << "\n";
 796
 797         if (m_type == TYPE_OVERWRITE)
 798         {
 799                 // write
 800
 801                 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 802                 {
 803                         if (m_storage == STORAGE_BUFFER && m_useAtomic)
 804                                 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
 805                         else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 806                                 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
 807                         else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 808                                 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
 809                         else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 810                                 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
 811                         else
 812                                 DE_ASSERT(DE_FALSE);
 813                 }
 814
 815                 // barrier
 816
 817                 buf << genBarrierSource();
 818         }
 819         else
 820                 DE_ASSERT(m_type == TYPE_WRITE);
 821
 822         // write (again)
 823
 824         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 825         {
 826                 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
 827
 828                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 829                         buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
 830                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 831                         buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
 832                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 833                         buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
 834                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 835                         buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
 836                 else
 837                         DE_ASSERT(DE_FALSE);
 838         }
 839
 840         // barrier
 841
 842         buf << genBarrierSource();
 843
 844         // read
 845
 846         for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
 847         {
 848                 if (m_storage == STORAGE_BUFFER && m_useAtomic)
 849                         buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
 850                 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
 851                         buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
 852                 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
 853                         buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
 854                 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
 855                         buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
 856                 else
 857                         DE_ASSERT(DE_FALSE);
 858         }
 859
 860         // return result
 861
 862         buf << "\n"
 863                 << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
 864                 << "}\n";
 865
 866         return specializeShader(m_context, buf.str().c_str());
 867 }
 868
 869 namespace op
 870 {
 871
 872 struct WriteData
 873 {
 874         int targetHandle;
 875         int seed;
 876
 877         static WriteData Generate(int targetHandle, int seed)
 878         {
 879                 WriteData retVal;
 880
 881                 retVal.targetHandle = targetHandle;
 882                 retVal.seed = seed;
 883
 884                 return retVal;
 885         }
 886 };
 887
 888 struct ReadData
 889 {
 890         int targetHandle;
 891         int seed;
 892
 893         static ReadData Generate(int targetHandle, int seed)
 894         {
 895                 ReadData retVal;
 896
 897                 retVal.targetHandle = targetHandle;
 898                 retVal.seed = seed;
 899
 900                 return retVal;
 901         }
 902 };
 903
 904 struct Barrier
 905 {
 906 };
 907
 908 struct WriteDataInterleaved
 909 {
 910         int             targetHandle;
 911         int             seed;
 912         bool    evenOdd;
 913
 914         static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
 915         {
 916                 WriteDataInterleaved retVal;
 917
 918                 retVal.targetHandle = targetHandle;
 919                 retVal.seed = seed;
 920                 retVal.evenOdd = evenOdd;
 921
 922                 return retVal;
 923         }
 924 };
 925
 926 struct ReadDataInterleaved
 927 {
 928         int targetHandle;
 929         int seed0;
 930         int seed1;
 931
 932         static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
 933         {
 934                 ReadDataInterleaved retVal;
 935
 936                 retVal.targetHandle = targetHandle;
 937                 retVal.seed0 = seed0;
 938                 retVal.seed1 = seed1;
 939
 940                 return retVal;
 941         }
 942 };
 943
 944 struct ReadMultipleData
 945 {
 946         int targetHandle0;
 947         int seed0;
 948         int targetHandle1;
 949         int seed1;
 950
 951         static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
 952         {
 953                 ReadMultipleData retVal;
 954
 955                 retVal.targetHandle0 = targetHandle0;
 956                 retVal.seed0 = seed0;
 957                 retVal.targetHandle1 = targetHandle1;
 958                 retVal.seed1 = seed1;
 959
 960                 return retVal;
 961         }
 962 };
 963
 964 struct ReadZeroData
 965 {
 966         int targetHandle;
 967
 968         static ReadZeroData Generate(int targetHandle)
 969         {
 970                 ReadZeroData retVal;
 971
 972                 retVal.targetHandle = targetHandle;
 973
 974                 return retVal;
 975         }
 976 };
 977
 978 } // namespace op
 979
 980 class InterCallTestCase;
 981
 982 class InterCallOperations
 983 {
 984 public:
 985         InterCallOperations& operator<< (const op::WriteData&);
 986         InterCallOperations& operator<< (const op::ReadData&);
 987         InterCallOperations& operator<< (const op::Barrier&);
 988         InterCallOperations& operator<< (const op::ReadMultipleData&);
 989         InterCallOperations& operator<< (const op::WriteDataInterleaved&);
 990         InterCallOperations& operator<< (const op::ReadDataInterleaved&);
 991         InterCallOperations& operator<< (const op::ReadZeroData&);
 992
 993 private:
 994         struct Command
 995         {
 996                 enum CommandType
 997                 {
 998                         TYPE_WRITE = 0,
 999                         TYPE_READ,
1000                         TYPE_BARRIER,
1001                         TYPE_READ_MULTIPLE,
1002                         TYPE_WRITE_INTERLEAVE,
1003                         TYPE_READ_INTERLEAVE,
1004                         TYPE_READ_ZERO,
1005
1006                         TYPE_LAST
1007                 };
1008
1009                 CommandType type;
1010
1011                 union CommandUnion
1012                 {
1013                         op::WriteData                           write;
1014                         op::ReadData                            read;
1015                         op::Barrier                                     barrier;
1016                         op::ReadMultipleData            readMulti;
1017                         op::WriteDataInterleaved        writeInterleave;
1018                         op::ReadDataInterleaved         readInterleave;
1019                         op::ReadZeroData                        readZero;
1020                 } u_cmd;
1021         };
1022
1023         friend class InterCallTestCase;
1024
1025         std::vector<Command> m_cmds;
1026 };
1027
1028 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1029 {
1030         m_cmds.push_back(Command());
1031         m_cmds.back().type = Command::TYPE_WRITE;
1032         m_cmds.back().u_cmd.write = cmd;
1033
1034         return *this;
1035 }
1036
1037 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1038 {
1039         m_cmds.push_back(Command());
1040         m_cmds.back().type = Command::TYPE_READ;
1041         m_cmds.back().u_cmd.read = cmd;
1042
1043         return *this;
1044 }
1045
1046 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1047 {
1048         m_cmds.push_back(Command());
1049         m_cmds.back().type = Command::TYPE_BARRIER;
1050         m_cmds.back().u_cmd.barrier = cmd;
1051
1052         return *this;
1053 }
1054
1055 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1056 {
1057         m_cmds.push_back(Command());
1058         m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1059         m_cmds.back().u_cmd.readMulti = cmd;
1060
1061         return *this;
1062 }
1063
1064 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1065 {
1066         m_cmds.push_back(Command());
1067         m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1068         m_cmds.back().u_cmd.writeInterleave = cmd;
1069
1070         return *this;
1071 }
1072
1073 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1074 {
1075         m_cmds.push_back(Command());
1076         m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1077         m_cmds.back().u_cmd.readInterleave = cmd;
1078
1079         return *this;
1080 }
1081
1082 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1083 {
1084         m_cmds.push_back(Command());
1085         m_cmds.back().type = Command::TYPE_READ_ZERO;
1086         m_cmds.back().u_cmd.readZero = cmd;
1087
1088         return *this;
1089 }
1090
1091 class InterCallTestCase : public TestCase
1092 {
1093 public:
1094         enum StorageType
1095         {
1096                 STORAGE_BUFFER = 0,
1097                 STORAGE_IMAGE,
1098
1099                 STORAGE_LAST
1100         };
1101         enum Flags
1102         {
1103                 FLAG_USE_ATOMIC = 1,
1104                 FLAG_USE_INT    = 2,
1105         };
1106                                                                                                         InterCallTestCase                       (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1107                                                                                                         ~InterCallTestCase                      (void);
1108
1109 private:
1110         void                                                                                    init                                            (void);
1111         void                                                                                    deinit                                          (void);
1112         IterateResult                                                                   iterate                                         (void);
1113         bool                                                                                    verifyResults                           (void);
1114
1115         void                                                                                    runCommand                                      (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1116         void                                                                                    runCommand                                      (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117         void                                                                                    runCommand                                      (const op::Barrier&);
1118         void                                                                                    runCommand                                      (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1119         void                                                                                    runCommand                                      (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1120         void                                                                                    runCommand                                      (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1121         void                                                                                    runCommand                                      (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1122         void                                                                                    runSingleRead                           (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1123
1124         glw::GLuint                                                                             genStorage                                      (int friendlyName);
1125         glw::GLuint                                                                             genResultStorage                        (void);
1126         glu::ShaderProgram*                                                             genWriteProgram                         (int seed);
1127         glu::ShaderProgram*                                                             genReadProgram                          (int seed);
1128         glu::ShaderProgram*                                                             genReadMultipleProgram          (int seed0, int seed1);
1129         glu::ShaderProgram*                                                             genWriteInterleavedProgram      (int seed, bool evenOdd);
1130         glu::ShaderProgram*                                                             genReadInterleavedProgram       (int seed0, int seed1);
1131         glu::ShaderProgram*                                                             genReadZeroProgram                      (void);
1132
1133         const StorageType                                                               m_storage;
1134         const int                                                                               m_invocationGridSize;   // !< width and height of the two dimensional work dispatch
1135         const int                                                                               m_perInvocationSize;    // !< number of elements accessed in single invocation
1136         const std::vector<InterCallOperations::Command> m_cmds;
1137         const bool                                                                              m_useAtomic;
1138         const bool                                                                              m_formatInteger;
1139
1140         std::vector<glu::ShaderProgram*>                                m_operationPrograms;
1141         std::vector<glw::GLuint>                                                m_operationResultStorages;
1142         std::map<int, glw::GLuint>                                              m_storageIDs;
1143 };
1144
1145 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1146         : TestCase                                      (context, name, desc)
1147         , m_storage                                     (storage)
1148         , m_invocationGridSize          (512)
1149         , m_perInvocationSize           (2)
1150         , m_cmds                                        (ops.m_cmds)
1151         , m_useAtomic                           ((flags & FLAG_USE_ATOMIC) != 0)
1152         , m_formatInteger                       ((flags & FLAG_USE_INT) != 0)
1153 {
1154 }
1155
1156 InterCallTestCase::~InterCallTestCase (void)
1157 {
1158         deinit();
1159 }
1160
1161 void InterCallTestCase::init (void)
1162 {
1163         int                     programFriendlyName = 0;
1164
1165         // requirements
1166
1167         if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
1168                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1169
1170         // generate resources and validate command list
1171
1172         m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1173         m_operationResultStorages.resize(m_cmds.size(), 0);
1174
1175         for (int step = 0; step < (int)m_cmds.size(); ++step)
1176         {
1177                 switch (m_cmds[step].type)
1178                 {
1179                         case InterCallOperations::Command::TYPE_WRITE:
1180                         {
1181                                 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1182
1183                                 // new storage handle?
1184                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1185                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1186
1187                                 // program
1188                                 {
1189                                         glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1190
1191                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1192                                         m_testCtx.getLog() << *program;
1193
1194                                         if (!program->isOk())
1195                                                 throw tcu::TestError("could not build program");
1196
1197                                         m_operationPrograms[step] = program;
1198                                 }
1199                                 break;
1200                         }
1201
1202                         case InterCallOperations::Command::TYPE_READ:
1203                         {
1204                                 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1205                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1206
1207                                 // program and result storage
1208                                 {
1209                                         glu::ShaderProgram* program = genReadProgram(cmd.seed);
1210
1211                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1212                                         m_testCtx.getLog() << *program;
1213
1214                                         if (!program->isOk())
1215                                                 throw tcu::TestError("could not build program");
1216
1217                                         m_operationPrograms[step] = program;
1218                                         m_operationResultStorages[step] = genResultStorage();
1219                                 }
1220                                 break;
1221                         }
1222
1223                         case InterCallOperations::Command::TYPE_BARRIER:
1224                         {
1225                                 break;
1226                         }
1227
1228                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1229                         {
1230                                 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1231                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1232                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1233
1234                                 // program
1235                                 {
1236                                         glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1237
1238                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1239                                         m_testCtx.getLog() << *program;
1240
1241                                         if (!program->isOk())
1242                                                 throw tcu::TestError("could not build program");
1243
1244                                         m_operationPrograms[step] = program;
1245                                         m_operationResultStorages[step] = genResultStorage();
1246                                 }
1247                                 break;
1248                         }
1249
1250                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1251                         {
1252                                 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1253
1254                                 // new storage handle?
1255                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1256                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1257
1258                                 // program
1259                                 {
1260                                         glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1261
1262                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1263                                         m_testCtx.getLog() << *program;
1264
1265                                         if (!program->isOk())
1266                                                 throw tcu::TestError("could not build program");
1267
1268                                         m_operationPrograms[step] = program;
1269                                 }
1270                                 break;
1271                         }
1272
1273                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1274                         {
1275                                 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1276                                 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1277
1278                                 // program
1279                                 {
1280                                         glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1281
1282                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1283                                         m_testCtx.getLog() << *program;
1284
1285                                         if (!program->isOk())
1286                                                 throw tcu::TestError("could not build program");
1287
1288                                         m_operationPrograms[step] = program;
1289                                         m_operationResultStorages[step] = genResultStorage();
1290                                 }
1291                                 break;
1292                         }
1293
1294                         case InterCallOperations::Command::TYPE_READ_ZERO:
1295                         {
1296                                 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1297
1298                                 // new storage handle?
1299                                 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1300                                         m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1301
1302                                 // program
1303                                 {
1304                                         glu::ShaderProgram* program = genReadZeroProgram();
1305
1306                                         m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1307                                         m_testCtx.getLog() << *program;
1308
1309                                         if (!program->isOk())
1310                                                 throw tcu::TestError("could not build program");
1311
1312                                         m_operationPrograms[step] = program;
1313                                         m_operationResultStorages[step] = genResultStorage();
1314                                 }
1315                                 break;
1316                         }
1317
1318                         default:
1319                                 DE_ASSERT(DE_FALSE);
1320                 }
1321         }
1322 }
1323
1324 void InterCallTestCase::deinit (void)
1325 {
1326         // programs
1327         for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1328                 delete m_operationPrograms[ndx];
1329         m_operationPrograms.clear();
1330
1331         // result storages
1332         for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1333         {
1334                 if (m_operationResultStorages[ndx])
1335                         m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1336         }
1337         m_operationResultStorages.clear();
1338
1339         // storage
1340         for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1341         {
1342                 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1343
1344                 if (m_storage == STORAGE_BUFFER)
1345                         gl.deleteBuffers(1, &it->second);
1346                 else if (m_storage == STORAGE_IMAGE)
1347                         gl.deleteTextures(1, &it->second);
1348                 else
1349                         DE_ASSERT(DE_FALSE);
1350         }
1351         m_storageIDs.clear();
1352 }
1353
1354 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1355 {
1356         int programFriendlyName                 = 0;
1357         int resultStorageFriendlyName   = 0;
1358
1359         m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1360
1361         // run steps
1362
1363         for (int step = 0; step < (int)m_cmds.size(); ++step)
1364         {
1365                 switch (m_cmds[step].type)
1366                 {
1367                         case InterCallOperations::Command::TYPE_WRITE:                          runCommand(m_cmds[step].u_cmd.write,                    step,   programFriendlyName);                                                           break;
1368                         case InterCallOperations::Command::TYPE_READ:                           runCommand(m_cmds[step].u_cmd.read,                             step,   programFriendlyName, resultStorageFriendlyName);        break;
1369                         case InterCallOperations::Command::TYPE_BARRIER:                        runCommand(m_cmds[step].u_cmd.barrier);                                                                                                                                         break;
1370                         case InterCallOperations::Command::TYPE_READ_MULTIPLE:          runCommand(m_cmds[step].u_cmd.readMulti,                step,   programFriendlyName, resultStorageFriendlyName);        break;
1371                         case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:       runCommand(m_cmds[step].u_cmd.writeInterleave,  step,   programFriendlyName);                                                           break;
1372                         case InterCallOperations::Command::TYPE_READ_INTERLEAVE:        runCommand(m_cmds[step].u_cmd.readInterleave,   step,   programFriendlyName, resultStorageFriendlyName);        break;
1373                         case InterCallOperations::Command::TYPE_READ_ZERO:                      runCommand(m_cmds[step].u_cmd.readZero,                 step,   programFriendlyName, resultStorageFriendlyName);        break;
1374                         default:
1375                                 DE_ASSERT(DE_FALSE);
1376                 }
1377         }
1378
1379         // read results from result buffers
1380         if (verifyResults())
1381                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1382         else
1383                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1384
1385         return STOP;
1386 }
1387
1388 bool InterCallTestCase::verifyResults (void)
1389 {
1390         int             resultBufferFriendlyName        = 0;
1391         bool    allResultsOk                            = true;
1392         bool    anyResult                                       = false;
1393
1394         m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1395
1396         for (int step = 0; step < (int)m_cmds.size(); ++step)
1397         {
1398                 const int       errorFloodThreshold     = 5;
1399                 int                     numErrorsLogged         = 0;
1400
1401                 if (m_operationResultStorages[step])
1402                 {
1403                         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1404                         const void*                             mapped  = DE_NULL;
1405                         std::vector<deInt32>    results (m_invocationGridSize * m_invocationGridSize);
1406                         bool                                    error   = false;
1407
1408                         anyResult = true;
1409
1410                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1411                         mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1412                         GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1413
1414                         // copy to properly aligned array
1415                         deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1416
1417                         if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1418                                 throw tcu::TestError("memory map store corrupted");
1419
1420                         // check the results
1421                         for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1422                         {
1423                                 if (results[ndx] != 1)
1424                                 {
1425                                         error = true;
1426
1427                                         if (numErrorsLogged == 0)
1428                                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1429                                         if (numErrorsLogged++ < errorFloodThreshold)
1430                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1431                                         else
1432                                         {
1433                                                 // after N errors, no point continuing verification
1434                                                 m_testCtx.getLog() << tcu::TestLog::Message << "        -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1435                                                 break;
1436                                         }
1437                                 }
1438                         }
1439
1440                         if (error)
1441                         {
1442                                 allResultsOk = false;
1443                         }
1444                         else
1445                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1446                 }
1447         }
1448
1449         DE_ASSERT(anyResult);
1450         DE_UNREF(anyResult);
1451
1452         return allResultsOk;
1453 }
1454
1455 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1456 {
1457         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1458
1459         m_testCtx.getLog()
1460                 << tcu::TestLog::Message
1461                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1462                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1463                 << tcu::TestLog::EndMessage;
1464
1465         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1466
1467         // set destination
1468         if (m_storage == STORAGE_BUFFER)
1469         {
1470                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1471
1472                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1473                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1474         }
1475         else if (m_storage == STORAGE_IMAGE)
1476         {
1477                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1478
1479                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1480                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1481         }
1482         else
1483                 DE_ASSERT(DE_FALSE);
1484
1485         // calc
1486         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1487         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1488 }
1489
1490 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1491 {
1492         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1493 }
1494
1495 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1496 {
1497         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1498
1499         DE_UNREF(cmd);
1500
1501         if (m_storage == STORAGE_BUFFER)
1502         {
1503                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504                 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1505         }
1506         else if (m_storage == STORAGE_IMAGE)
1507         {
1508                 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1509                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1510         }
1511         else
1512                 DE_ASSERT(DE_FALSE);
1513 }
1514
1515 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1516 {
1517         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1518
1519         m_testCtx.getLog()
1520                 << tcu::TestLog::Message
1521                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1522                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1523                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1524                 << tcu::TestLog::EndMessage;
1525
1526         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1527
1528         // set sources
1529         if (m_storage == STORAGE_BUFFER)
1530         {
1531                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1532                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1533
1534                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1535                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1536                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1537         }
1538         else if (m_storage == STORAGE_IMAGE)
1539         {
1540                 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1541                 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1542
1543                 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1544                 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1545                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1546         }
1547         else
1548                 DE_ASSERT(DE_FALSE);
1549
1550         // set destination
1551         DE_ASSERT(m_operationResultStorages[stepNdx]);
1552         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1553         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1554
1555         // calc
1556         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1557         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1558 }
1559
1560 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1561 {
1562         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1563
1564         m_testCtx.getLog()
1565                 << tcu::TestLog::Message
1566                 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1567                 << "    Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1568                 << "    Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1569                 << tcu::TestLog::EndMessage;
1570
1571         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1572
1573         // set destination
1574         if (m_storage == STORAGE_BUFFER)
1575         {
1576                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1577
1578                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1579                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1580         }
1581         else if (m_storage == STORAGE_IMAGE)
1582         {
1583                 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1584
1585                 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1586                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1587         }
1588         else
1589                 DE_ASSERT(DE_FALSE);
1590
1591         // calc
1592         gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1593         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1594 }
1595
1596 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 {
1598         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599 }
1600
1601 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 {
1603         runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1604 }
1605
1606 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1607 {
1608         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1609
1610         m_testCtx.getLog()
1611                 << tcu::TestLog::Message
1612                 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1613                 << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1614                 << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1615                 << tcu::TestLog::EndMessage;
1616
1617         gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1618
1619         // set source
1620         if (m_storage == STORAGE_BUFFER)
1621         {
1622                 DE_ASSERT(m_storageIDs[targetHandle]);
1623
1624                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1625                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1626         }
1627         else if (m_storage == STORAGE_IMAGE)
1628         {
1629                 DE_ASSERT(m_storageIDs[targetHandle]);
1630
1631                 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1632                 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1633         }
1634         else
1635                 DE_ASSERT(DE_FALSE);
1636
1637         // set destination
1638         DE_ASSERT(m_operationResultStorages[stepNdx]);
1639         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1640         GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1641
1642         // calc
1643         gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1644         GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1645 }
1646
1647 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1648 {
1649         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1650
1651         if (m_storage == STORAGE_BUFFER)
1652         {
1653                 const int               numElements             = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1654                 const int               bufferSize              = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1655                 glw::GLuint             retVal                  = 0;
1656
1657                 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1658
1659                 gl.genBuffers(1, &retVal);
1660                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1661
1662                 if (m_formatInteger)
1663                 {
1664                         const std::vector<deUint32> zeroBuffer(numElements, 0);
1665                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666                 }
1667                 else
1668                 {
1669                         const std::vector<float> zeroBuffer(numElements, 0.0f);
1670                         gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1671                 }
1672                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1673
1674                 return retVal;
1675         }
1676         else if (m_storage == STORAGE_IMAGE)
1677         {
1678                 const int       imageWidth      = m_invocationGridSize;
1679                 const int       imageHeight     = m_invocationGridSize * m_perInvocationSize;
1680                 glw::GLuint     retVal          = 0;
1681
1682                 m_testCtx.getLog()
1683                         << tcu::TestLog::Message
1684                         << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1685                         << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1686                         << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1687                         << tcu::TestLog::EndMessage;
1688
1689                 gl.genTextures(1, &retVal);
1690                 gl.bindTexture(GL_TEXTURE_2D, retVal);
1691
1692                 if (m_formatInteger)
1693                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1694                 else
1695                         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1696
1697                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1698                 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1699                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1700
1701                 m_testCtx.getLog()
1702                         << tcu::TestLog::Message
1703                         << "Filling image with 0"
1704                         << tcu::TestLog::EndMessage;
1705
1706                 if (m_formatInteger)
1707                 {
1708                         const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1709                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1710                 }
1711                 else
1712                 {
1713                         const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1714                         gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1715                 }
1716
1717                 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1718
1719                 return retVal;
1720         }
1721         else
1722         {
1723                 DE_ASSERT(DE_FALSE);
1724                 return 0;
1725         }
1726 }
1727
1728 glw::GLuint InterCallTestCase::genResultStorage (void)
1729 {
1730         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
1731         glw::GLuint                             retVal  = 0;
1732
1733         gl.genBuffers(1, &retVal);
1734         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1735         gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1736         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1737
1738         return retVal;
1739 }
1740
1741 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1742 {
1743         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1744         std::ostringstream      buf;
1745
1746         buf << "${GLSL_VERSION_DECL}\n"
1747                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1748                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1749
1750         if (m_storage == STORAGE_BUFFER)
1751                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1752                         << "{\n"
1753                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1754                         << "} sb_out;\n";
1755         else if (m_storage == STORAGE_IMAGE)
1756                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1757         else
1758                 DE_ASSERT(DE_FALSE);
1759
1760         buf << "\n"
1761                 << "void main (void)\n"
1762                 << "{\n"
1763                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1764                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1765                 << "\n";
1766
1767         // Write to buffer/image m_perInvocationSize elements
1768         if (m_storage == STORAGE_BUFFER)
1769         {
1770                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1771                 {
1772                         if (m_useAtomic)
1773                                 buf << "        atomicExchange(";
1774                         else
1775                                 buf << "        ";
1776
1777                         buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1778
1779                         if (m_useAtomic)
1780                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1781                         else
1782                                 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1783                 }
1784         }
1785         else if (m_storage == STORAGE_IMAGE)
1786         {
1787                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1788                 {
1789                         if (m_useAtomic)
1790                                 buf << "        imageAtomicExchange";
1791                         else
1792                                 buf << "        imageStore";
1793
1794                         buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1795
1796                         if (m_useAtomic)
1797                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1798                         else
1799                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1800                 }
1801         }
1802         else
1803                 DE_ASSERT(DE_FALSE);
1804
1805         buf << "}\n";
1806
1807         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1808 }
1809
1810 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1811 {
1812         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1813         std::ostringstream      buf;
1814
1815         buf << "${GLSL_VERSION_DECL}\n"
1816                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1817                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1818
1819         if (m_storage == STORAGE_BUFFER)
1820                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1821                         << "{\n"
1822                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1823                         << "} sb_in;\n";
1824         else if (m_storage == STORAGE_IMAGE)
1825                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1826         else
1827                 DE_ASSERT(DE_FALSE);
1828
1829         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1830                 << "{\n"
1831                 << "    highp int resultOk[];\n"
1832                 << "} sb_result;\n"
1833                 << "\n"
1834                 << "void main (void)\n"
1835                 << "{\n"
1836                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1837                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1838                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1839                 << "    bool allOk = true;\n"
1840                 << "\n";
1841
1842         // Verify data
1843
1844         if (m_storage == STORAGE_BUFFER)
1845         {
1846                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1847                 {
1848                         if (!m_useAtomic)
1849                                 buf << "        allOk = allOk && (sb_in.values[(groupNdx + "
1850                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1851                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1852                         else
1853                                 buf << "        allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1854                                         << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1855                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1856                 }
1857         }
1858         else if (m_storage == STORAGE_IMAGE)
1859         {
1860                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1861                 {
1862                         if (!m_useAtomic)
1863                                 buf     << "    allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1864                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1865                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1866                         else
1867                                 buf << "        allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1868                                         << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1869                                         << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1870                 }
1871         }
1872         else
1873                 DE_ASSERT(DE_FALSE);
1874
1875         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1876                 << "}\n";
1877
1878         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1879 }
1880
1881 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1882 {
1883         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1884         std::ostringstream      buf;
1885
1886         buf << "${GLSL_VERSION_DECL}\n"
1887                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1888                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1889
1890         if (m_storage == STORAGE_BUFFER)
1891                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1892                         << "{\n"
1893                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1894                         << "} sb_in0;\n"
1895                         << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1896                         << "{\n"
1897                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1898                         << "} sb_in1;\n";
1899         else if (m_storage == STORAGE_IMAGE)
1900                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1901                         << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1902         else
1903                 DE_ASSERT(DE_FALSE);
1904
1905         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1906                 << "{\n"
1907                 << "    highp int resultOk[];\n"
1908                 << "} sb_result;\n"
1909                 << "\n"
1910                 << "void main (void)\n"
1911                 << "{\n"
1912                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1913                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1914                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1915                 << "    bool allOk = true;\n"
1916                 << "\n";
1917
1918         // Verify data
1919
1920         if (m_storage == STORAGE_BUFFER)
1921         {
1922                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1925         }
1926         else if (m_storage == STORAGE_IMAGE)
1927         {
1928                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1929                         buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1930                                 << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1931         }
1932         else
1933                 DE_ASSERT(DE_FALSE);
1934
1935         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1936                 << "}\n";
1937
1938         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1939 }
1940
1941 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1942 {
1943         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1944         std::ostringstream      buf;
1945
1946         buf << "${GLSL_VERSION_DECL}\n"
1947                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1948                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1949
1950         if (m_storage == STORAGE_BUFFER)
1951                 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1952                         << "{\n"
1953                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1954                         << "} sb_out;\n";
1955         else if (m_storage == STORAGE_IMAGE)
1956                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1957         else
1958                 DE_ASSERT(DE_FALSE);
1959
1960         buf << "\n"
1961                 << "void main (void)\n"
1962                 << "{\n"
1963                 << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1964                 << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1965                 << "\n";
1966
1967         // Write to buffer/image m_perInvocationSize elements
1968         if (m_storage == STORAGE_BUFFER)
1969         {
1970                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1971                 {
1972                         if (m_useAtomic)
1973                                 buf << "        atomicExchange(";
1974                         else
1975                                 buf << "        ";
1976
1977                         buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1978
1979                         if (m_useAtomic)
1980                                 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1981                         else
1982                                 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1983                 }
1984         }
1985         else if (m_storage == STORAGE_IMAGE)
1986         {
1987                 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1988                 {
1989                         if (m_useAtomic)
1990                                 buf << "        imageAtomicExchange";
1991                         else
1992                                 buf << "        imageStore";
1993
1994                         buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1995
1996                         if (m_useAtomic)
1997                                 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1998                         else
1999                                 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
2000                 }
2001         }
2002         else
2003                 DE_ASSERT(DE_FALSE);
2004
2005         buf << "}\n";
2006
2007         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2008 }
2009
2010 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2011 {
2012         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2013         std::ostringstream      buf;
2014
2015         buf << "${GLSL_VERSION_DECL}\n"
2016                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2017                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2018
2019         if (m_storage == STORAGE_BUFFER)
2020                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2021                         << "{\n"
2022                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2023                         << "} sb_in;\n";
2024         else if (m_storage == STORAGE_IMAGE)
2025                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2026         else
2027                 DE_ASSERT(DE_FALSE);
2028
2029         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2030                 << "{\n"
2031                 << "    highp int resultOk[];\n"
2032                 << "} sb_result;\n"
2033                 << "\n"
2034                 << "void main (void)\n"
2035                 << "{\n"
2036                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2037                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2038                 << "    int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2039                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2040                 << "    bool allOk = true;\n"
2041                 << "\n";
2042
2043         // Verify data
2044
2045         if (m_storage == STORAGE_BUFFER)
2046         {
2047                 buf << "        if (groupNdx % 2 == 0)\n"
2048                         << "    {\n";
2049                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2050                         buf << "                allOk = allOk && ("
2051                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2052                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2053                 buf << "        }\n"
2054                         << "    else\n"
2055                         << "    {\n";
2056                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2057                         buf << "                allOk = allOk && ("
2058                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2059                                 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2060                 buf << "        }\n";
2061         }
2062         else if (m_storage == STORAGE_IMAGE)
2063         {
2064                 buf << "        if (groupNdx % 2 == 0)\n"
2065                         << "    {\n";
2066                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2067                         buf << "                allOk = allOk && ("
2068                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2069                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2070                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2071                 buf << "        }\n"
2072                         << "    else\n"
2073                         << "    {\n";
2074                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2075                         buf << "                allOk = allOk && ("
2076                                 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2077                                 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2078                                 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2079                 buf << "        }\n";
2080         }
2081         else
2082                 DE_ASSERT(DE_FALSE);
2083
2084         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2085                 << "}\n";
2086
2087         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2088 }
2089
2090 glu::ShaderProgram*     InterCallTestCase::genReadZeroProgram (void)
2091 {
2092         const bool                      useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2093         std::ostringstream      buf;
2094
2095         buf << "${GLSL_VERSION_DECL}\n"
2096                 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2097                 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2098
2099         if (m_storage == STORAGE_BUFFER)
2100                 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2101                         << "{\n"
2102                         << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2103                         << "} sb_in;\n";
2104         else if (m_storage == STORAGE_IMAGE)
2105                 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2106         else
2107                 DE_ASSERT(DE_FALSE);
2108
2109         buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2110                 << "{\n"
2111                 << "    highp int resultOk[];\n"
2112                 << "} sb_result;\n"
2113                 << "\n"
2114                 << "void main (void)\n"
2115                 << "{\n"
2116                 << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2117                 << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2118                 << "    " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2119                 << "    bool allOk = true;\n"
2120                 << "\n";
2121
2122         // Verify data
2123
2124         if (m_storage == STORAGE_BUFFER)
2125         {
2126                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2127                         buf << "        allOk = allOk && ("
2128                                 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2129                                 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2130         }
2131         else if (m_storage == STORAGE_IMAGE)
2132         {
2133                 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2134                         buf << "        allOk = allOk && ("
2135                         << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2136                         << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2137         }
2138         else
2139                 DE_ASSERT(DE_FALSE);
2140
2141         buf << "        sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2142                 << "}\n";
2143
2144         return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2145 }
2146
2147 class SSBOConcurrentAtomicCase : public TestCase
2148 {
2149 public:
2150
2151                                                         SSBOConcurrentAtomicCase        (Context& context, const char* name, const char* description, int numCalls, int workSize);
2152                                                         ~SSBOConcurrentAtomicCase       (void);
2153
2154         void                                    init                                            (void);
2155         void                                    deinit                                          (void);
2156         IterateResult                   iterate                                         (void);
2157
2158 private:
2159         std::string                             genComputeSource                        (void) const;
2160
2161         const int                               m_numCalls;
2162         const int                               m_workSize;
2163         glu::ShaderProgram*             m_program;
2164         deUint32                                m_bufferID;
2165         std::vector<deUint32>   m_intermediateResultBuffers;
2166 };
2167
2168 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2169         : TestCase              (context, name, description)
2170         , m_numCalls    (numCalls)
2171         , m_workSize    (workSize)
2172         , m_program             (DE_NULL)
2173         , m_bufferID    (DE_NULL)
2174 {
2175 }
2176
2177 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2178 {
2179         deinit();
2180 }
2181
2182 void SSBOConcurrentAtomicCase::init (void)
2183 {
2184         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2185         std::vector<deUint32>   zeroData                        (m_workSize, 0);
2186
2187         // gen buffers
2188
2189         gl.genBuffers(1, &m_bufferID);
2190         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2191         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2192
2193         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2194         {
2195                 deUint32 buffer = 0;
2196
2197                 gl.genBuffers(1, &buffer);
2198                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2199                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2200
2201                 m_intermediateResultBuffers.push_back(buffer);
2202                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2203         }
2204
2205         // gen program
2206
2207         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2208         m_testCtx.getLog() << *m_program;
2209         if (!m_program->isOk())
2210                 throw tcu::TestError("could not build program");
2211 }
2212
2213 void SSBOConcurrentAtomicCase::deinit (void)
2214 {
2215         if (m_bufferID)
2216         {
2217                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2218                 m_bufferID = 0;
2219         }
2220
2221         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2222                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2223         m_intermediateResultBuffers.clear();
2224
2225         delete m_program;
2226         m_program = DE_NULL;
2227 }
2228
2229 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2230 {
2231         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2232         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2233         std::vector<int>                deltas;
2234
2235         // generate unique deltas
2236         generateShuffledRamp(m_numCalls, deltas);
2237
2238         // invoke program N times, each with a different delta
2239         {
2240                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2241
2242                 m_testCtx.getLog()
2243                         << tcu::TestLog::Message
2244                         << "Running shader " << m_numCalls << " times.\n"
2245                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2246                         << "Setting u_atomicDelta to a unique value for each call.\n"
2247                         << tcu::TestLog::EndMessage;
2248
2249                 if (deltaLocation == -1)
2250                         throw tcu::TestError("u_atomicDelta location was -1");
2251
2252                 gl.useProgram(m_program->getProgram());
2253                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2254
2255                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2256                 {
2257                         m_testCtx.getLog()
2258                                 << tcu::TestLog::Message
2259                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2260                                 << tcu::TestLog::EndMessage;
2261
2262                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2263                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2264                         gl.dispatchCompute(m_workSize, 1, 1);
2265                 }
2266
2267                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2268         }
2269
2270         // Verify result
2271         {
2272                 std::vector<deUint32> result;
2273
2274                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2275
2276                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2277                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2278
2279                 for (int ndx = 0; ndx < m_workSize; ++ndx)
2280                 {
2281                         if (result[ndx] != sumValue)
2282                         {
2283                                 m_testCtx.getLog()
2284                                         << tcu::TestLog::Message
2285                                         << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2286                                         << "Work buffer contains invalid values."
2287                                         << tcu::TestLog::EndMessage;
2288
2289                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2290                                 return STOP;
2291                         }
2292                 }
2293
2294                 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2295         }
2296
2297         // verify steps
2298         {
2299                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2300                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2301
2302                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2303
2304                 // collect results
2305
2306                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2307                 {
2308                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2309                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2310                 }
2311
2312                 // verify values
2313
2314                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2315                 {
2316                         int                     invalidOperationNdx;
2317                         deUint32        errorDelta;
2318                         deUint32        errorExpected;
2319
2320                         // collect result chain for each element
2321                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2322                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2323
2324                         // check there exists a path from 0 to sumValue using each addition once
2325                         // decompose cumulative results to addition operations (all additions positive => this works)
2326
2327                         std::sort(valueChain.begin(), valueChain.end());
2328
2329                         // validate chain
2330                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2331                         {
2332                                 m_testCtx.getLog()
2333                                         << tcu::TestLog::Message
2334                                         << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2335                                         << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2336                                         << tcu::TestLog::EndMessage;
2337
2338                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2339                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2340                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2341
2342                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2343                                 return STOP;
2344                         }
2345                 }
2346
2347                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2348         }
2349
2350         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2351         return STOP;
2352 }
2353
2354 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2355 {
2356         std::ostringstream buf;
2357
2358         buf     << "${GLSL_VERSION_DECL}\n"
2359                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2360                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2361                 << "{\n"
2362                 << "    highp uint values[" << m_workSize << "];\n"
2363                 << "} sb_ires;\n"
2364                 << "\n"
2365                 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2366                 << "{\n"
2367                 << "    highp uint values[" << m_workSize << "];\n"
2368                 << "} sb_work;\n"
2369                 << "uniform highp uint u_atomicDelta;\n"
2370                 << "\n"
2371                 << "void main ()\n"
2372                 << "{\n"
2373                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2374                 << "    sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2375                 << "}";
2376
2377         return specializeShader(m_context, buf.str().c_str());
2378 }
2379
2380 class ConcurrentAtomicCounterCase : public TestCase
2381 {
2382 public:
2383
2384                                                         ConcurrentAtomicCounterCase             (Context& context, const char* name, const char* description, int numCalls, int workSize);
2385                                                         ~ConcurrentAtomicCounterCase    (void);
2386
2387         void                                    init                                                    (void);
2388         void                                    deinit                                                  (void);
2389         IterateResult                   iterate                                                 (void);
2390
2391 private:
2392         std::string                             genComputeSource                                (bool evenOdd) const;
2393
2394         const int                               m_numCalls;
2395         const int                               m_workSize;
2396         glu::ShaderProgram*             m_evenProgram;
2397         glu::ShaderProgram*             m_oddProgram;
2398         deUint32                                m_counterBuffer;
2399         deUint32                                m_intermediateResultBuffer;
2400 };
2401
2402 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2403         : TestCase                                      (context, name, description)
2404         , m_numCalls                            (numCalls)
2405         , m_workSize                            (workSize)
2406         , m_evenProgram                         (DE_NULL)
2407         , m_oddProgram                          (DE_NULL)
2408         , m_counterBuffer                       (DE_NULL)
2409         , m_intermediateResultBuffer(DE_NULL)
2410 {
2411 }
2412
2413 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2414 {
2415         deinit();
2416 }
2417
2418 void ConcurrentAtomicCounterCase::init (void)
2419 {
2420         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
2421         const std::vector<deUint32>     zeroData        (m_numCalls * m_workSize, 0);
2422
2423         // gen buffer
2424
2425         gl.genBuffers(1, &m_counterBuffer);
2426         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2427         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2428
2429         gl.genBuffers(1, &m_intermediateResultBuffer);
2430         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2431         gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2432
2433         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2434
2435         // gen programs
2436
2437         {
2438                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2439
2440                 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2441                 m_testCtx.getLog() << *m_evenProgram;
2442                 if (!m_evenProgram->isOk())
2443                         throw tcu::TestError("could not build program");
2444         }
2445         {
2446                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2447
2448                 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2449                 m_testCtx.getLog() << *m_oddProgram;
2450                 if (!m_oddProgram->isOk())
2451                         throw tcu::TestError("could not build program");
2452         }
2453 }
2454
2455 void ConcurrentAtomicCounterCase::deinit (void)
2456 {
2457         if (m_counterBuffer)
2458         {
2459                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2460                 m_counterBuffer = 0;
2461         }
2462         if (m_intermediateResultBuffer)
2463         {
2464                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2465                 m_intermediateResultBuffer = 0;
2466         }
2467
2468         delete m_evenProgram;
2469         m_evenProgram = DE_NULL;
2470
2471         delete m_oddProgram;
2472         m_oddProgram = DE_NULL;
2473 }
2474
2475 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2476 {
2477         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2478
2479         // invoke program N times, each with a different delta
2480         {
2481                 const int evenCallNdxLocation   = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2482                 const int oddCallNdxLocation    = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2483
2484                 m_testCtx.getLog()
2485                         << tcu::TestLog::Message
2486                         << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2487                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
2488                         << tcu::TestLog::EndMessage;
2489
2490                 if (evenCallNdxLocation == -1)
2491                         throw tcu::TestError("u_callNdx location was -1");
2492                 if (oddCallNdxLocation == -1)
2493                         throw tcu::TestError("u_callNdx location was -1");
2494
2495                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2496                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counterBuffer);
2497
2498                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2499                 {
2500                         gl.useProgram(m_evenProgram->getProgram());
2501                         gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2502                         gl.dispatchCompute(m_workSize, 1, 1);
2503
2504                         gl.useProgram(m_oddProgram->getProgram());
2505                         gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2506                         gl.dispatchCompute(m_workSize, 1, 1);
2507                 }
2508
2509                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2510         }
2511
2512         // Verify result
2513         {
2514                 deUint32 result;
2515
2516                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2517
2518                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2519                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2520
2521                 if ((int)result != m_numCalls*m_workSize)
2522                 {
2523                         m_testCtx.getLog()
2524                                 << tcu::TestLog::Message
2525                                 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2526                                 << tcu::TestLog::EndMessage;
2527
2528                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2529                         return STOP;
2530                 }
2531
2532                 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2533         }
2534
2535         // verify steps
2536         {
2537                 std::vector<deUint32> intermediateResults;
2538
2539                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2540
2541                 // collect results
2542
2543                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2544                 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2545
2546                 // verify values
2547
2548                 std::sort(intermediateResults.begin(), intermediateResults.end());
2549
2550                 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2551                 {
2552                         if ((int)intermediateResults[valueNdx] != valueNdx)
2553                         {
2554                                 m_testCtx.getLog()
2555                                         << tcu::TestLog::Message
2556                                         << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2557                                         << "Intermediate buffer contains invalid values. Intermediate results:\n"
2558                                         << tcu::TestLog::EndMessage;
2559
2560                                 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2561                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2562
2563                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2564                                 return STOP;
2565                         }
2566                 }
2567
2568                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2569         }
2570
2571         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2572         return STOP;
2573 }
2574
2575 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2576 {
2577         std::ostringstream buf;
2578
2579         buf     << "${GLSL_VERSION_DECL}\n"
2580                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2581                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2582                 << "{\n"
2583                 << "    highp uint values[" << m_workSize * m_numCalls << "];\n"
2584                 << "} sb_ires;\n"
2585                 << "\n"
2586                 << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
2587                 << "uniform highp uint u_callNdx;\n"
2588                 << "\n"
2589                 << "void main ()\n"
2590                 << "{\n"
2591                 << "    highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2592                 << "    if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2593                 << "            sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2594                 << "}";
2595
2596         return specializeShader(m_context, buf.str().c_str());
2597 }
2598
2599 class ConcurrentImageAtomicCase : public TestCase
2600 {
2601 public:
2602
2603                                                         ConcurrentImageAtomicCase       (Context& context, const char* name, const char* description, int numCalls, int workSize);
2604                                                         ~ConcurrentImageAtomicCase      (void);
2605
2606         void                                    init                                            (void);
2607         void                                    deinit                                          (void);
2608         IterateResult                   iterate                                         (void);
2609
2610 private:
2611         void                                    readWorkImage                           (std::vector<deUint32>& result);
2612
2613         std::string                             genComputeSource                        (void) const;
2614         std::string                             genImageReadSource                      (void) const;
2615         std::string                             genImageClearSource                     (void) const;
2616
2617         const int                               m_numCalls;
2618         const int                               m_workSize;
2619         glu::ShaderProgram*             m_program;
2620         glu::ShaderProgram*             m_imageReadProgram;
2621         glu::ShaderProgram*             m_imageClearProgram;
2622         deUint32                                m_imageID;
2623         std::vector<deUint32>   m_intermediateResultBuffers;
2624 };
2625
2626 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2627         : TestCase                              (context, name, description)
2628         , m_numCalls                    (numCalls)
2629         , m_workSize                    (workSize)
2630         , m_program                             (DE_NULL)
2631         , m_imageReadProgram    (DE_NULL)
2632         , m_imageClearProgram   (DE_NULL)
2633         , m_imageID                             (DE_NULL)
2634 {
2635 }
2636
2637 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2638 {
2639         deinit();
2640 }
2641
2642 void ConcurrentImageAtomicCase::init (void)
2643 {
2644         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
2645         std::vector<deUint32>   zeroData                        (m_workSize * m_workSize, 0);
2646
2647         if (!checkSupport(m_context))
2648                 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2649
2650         // gen image
2651
2652         gl.genTextures(1, &m_imageID);
2653         gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2654         gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2655         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2656         gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2657         GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2658
2659         // gen buffers
2660
2661         for (int ndx = 0; ndx < m_numCalls; ++ndx)
2662         {
2663                 deUint32 buffer = 0;
2664
2665                 gl.genBuffers(1, &buffer);
2666                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2667                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2668
2669                 m_intermediateResultBuffers.push_back(buffer);
2670                 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2671         }
2672
2673         // gen programs
2674
2675         m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2676         m_testCtx.getLog() << *m_program;
2677         if (!m_program->isOk())
2678                 throw tcu::TestError("could not build program");
2679
2680         m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2681         if (!m_imageReadProgram->isOk())
2682         {
2683                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2684
2685                 m_testCtx.getLog() << *m_imageReadProgram;
2686                 throw tcu::TestError("could not build program");
2687         }
2688
2689         m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2690         if (!m_imageClearProgram->isOk())
2691         {
2692                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2693
2694                 m_testCtx.getLog() << *m_imageClearProgram;
2695                 throw tcu::TestError("could not build program");
2696         }
2697 }
2698
2699 void ConcurrentImageAtomicCase::deinit (void)
2700 {
2701         if (m_imageID)
2702         {
2703                 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2704                 m_imageID = 0;
2705         }
2706
2707         for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2708                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2709         m_intermediateResultBuffers.clear();
2710
2711         delete m_program;
2712         m_program = DE_NULL;
2713
2714         delete m_imageReadProgram;
2715         m_imageReadProgram = DE_NULL;
2716
2717         delete m_imageClearProgram;
2718         m_imageClearProgram = DE_NULL;
2719 }
2720
2721 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2722 {
2723         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2724         const deUint32                  sumValue                = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2725         std::vector<int>                deltas;
2726
2727         // generate unique deltas
2728         generateShuffledRamp(m_numCalls, deltas);
2729
2730         // clear image
2731         {
2732                 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2733
2734                 gl.useProgram(m_imageClearProgram->getProgram());
2735                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2736                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2737                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2738
2739                 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2740         }
2741
2742         // invoke program N times, each with a different delta
2743         {
2744                 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2745
2746                 m_testCtx.getLog()
2747                         << tcu::TestLog::Message
2748                         << "Running shader " << m_numCalls << " times.\n"
2749                         << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2750                         << "Setting u_atomicDelta to a unique value for each call.\n"
2751                         << tcu::TestLog::EndMessage;
2752
2753                 if (deltaLocation == -1)
2754                         throw tcu::TestError("u_atomicDelta location was -1");
2755
2756                 gl.useProgram(m_program->getProgram());
2757                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2758
2759                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2760                 {
2761                         m_testCtx.getLog()
2762                                 << tcu::TestLog::Message
2763                                 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2764                                 << tcu::TestLog::EndMessage;
2765
2766                         gl.uniform1ui(deltaLocation, deltas[callNdx]);
2767                         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2768                         gl.dispatchCompute(m_workSize, m_workSize, 1);
2769                 }
2770
2771                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2772         }
2773
2774         // Verify result
2775         {
2776                 std::vector<deUint32> result;
2777
2778                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2779
2780                 readWorkImage(result);
2781
2782                 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2783                 {
2784                         if (result[ndx] != sumValue)
2785                         {
2786                                 m_testCtx.getLog()
2787                                         << tcu::TestLog::Message
2788                                         << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2789                                         << "Work image contains invalid values."
2790                                         << tcu::TestLog::EndMessage;
2791
2792                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2793                                 return STOP;
2794                         }
2795                 }
2796
2797                 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2798         }
2799
2800         // verify steps
2801         {
2802                 std::vector<std::vector<deUint32> >     intermediateResults     (m_numCalls);
2803                 std::vector<deUint32>                           valueChain                      (m_numCalls);
2804                 std::vector<deUint32>                           chainDelta                      (m_numCalls);
2805
2806                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2807
2808                 // collect results
2809
2810                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811                 {
2812                         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2813                         readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2814                 }
2815
2816                 // verify values
2817
2818                 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2819                 {
2820                         int                     invalidOperationNdx;
2821                         deUint32        errorDelta;
2822                         deUint32        errorExpected;
2823
2824                         // collect result chain for each element
2825                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2826                                 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2827
2828                         // check there exists a path from 0 to sumValue using each addition once
2829                         // decompose cumulative results to addition operations (all additions positive => this works)
2830
2831                         std::sort(valueChain.begin(), valueChain.end());
2832
2833                         for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2834                                 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2835
2836                         // chainDelta contains now the actual additions applied to the value
2837                         std::sort(chainDelta.begin(), chainDelta.end());
2838
2839                         // validate chain
2840                         if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2841                         {
2842                                 m_testCtx.getLog()
2843                                         << tcu::TestLog::Message
2844                                         << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2845                                         << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2846                                         << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2847                                         << tcu::TestLog::EndMessage;
2848
2849                                 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2850                                         m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2851                                 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2852
2853                                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2854                                 return STOP;
2855                         }
2856                 }
2857
2858                 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2859         }
2860
2861         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2862         return STOP;
2863 }
2864
2865 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2866 {
2867         const glw::Functions&   gl                              = m_context.getRenderContext().getFunctions();
2868         glu::Buffer                             resultBuffer    (m_context.getRenderContext());
2869
2870         // Read image to an ssbo
2871
2872         {
2873                 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2874
2875                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2876                 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2877
2878                 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2879                 gl.useProgram(m_imageReadProgram->getProgram());
2880
2881                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2882                 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2883                 gl.dispatchCompute(m_workSize, m_workSize, 1);
2884
2885                 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2886         }
2887
2888         // Read ssbo
2889         {
2890                 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2891                 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2892
2893                 if (!ptr)
2894                         throw tcu::TestError("mapBufferRange returned NULL");
2895
2896                 result.resize(m_workSize * m_workSize);
2897                 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2898
2899                 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2900                         throw tcu::TestError("unmapBuffer returned false");
2901         }
2902 }
2903
2904 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2905 {
2906         std::ostringstream buf;
2907
2908         buf     << "${GLSL_VERSION_DECL}\n"
2909                 << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2910                 << "\n"
2911                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2912                 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2913                 << "{\n"
2914                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2915                 << "} sb_ires;\n"
2916                 << "\n"
2917                 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2918                 << "uniform highp uint u_atomicDelta;\n"
2919                 << "\n"
2920                 << "void main ()\n"
2921                 << "{\n"
2922                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923                 << "    sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2924                 << "}";
2925
2926         return specializeShader(m_context, buf.str().c_str());
2927 }
2928
2929 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2930 {
2931         std::ostringstream buf;
2932
2933         buf     << "${GLSL_VERSION_DECL}\n"
2934                 << "\n"
2935                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936                 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2937                 << "{\n"
2938                 << "    highp uint values[" << m_workSize * m_workSize << "];\n"
2939                 << "} sb_res;\n"
2940                 << "\n"
2941                 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2942                 << "\n"
2943                 << "void main ()\n"
2944                 << "{\n"
2945                 << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2946                 << "    sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2947                 << "}";
2948
2949         return specializeShader(m_context, buf.str().c_str());
2950 }
2951
2952 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2953 {
2954         std::ostringstream buf;
2955
2956         buf     << "${GLSL_VERSION_DECL}\n"
2957                 << "\n"
2958                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2959                 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2960                 << "\n"
2961                 << "void main ()\n"
2962                 << "{\n"
2963                 << "    imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2964                 << "}";
2965
2966         return specializeShader(m_context, buf.str().c_str());
2967 }
2968
2969 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2970 {
2971 public:
2972                                                         ConcurrentSSBOAtomicCounterMixedCase    (Context& context, const char* name, const char* description, int numCalls, int workSize);
2973                                                         ~ConcurrentSSBOAtomicCounterMixedCase   (void);
2974
2975         void                                    init                                                                    (void);
2976         void                                    deinit                                                                  (void);
2977         IterateResult                   iterate                                                                 (void);
2978
2979 private:
2980         std::string                             genSSBOComputeSource                                    (void) const;
2981         std::string                             genAtomicCounterComputeSource                   (void) const;
2982
2983         const int                               m_numCalls;
2984         const int                               m_workSize;
2985         deUint32                                m_bufferID;
2986         glu::ShaderProgram*             m_ssboAtomicProgram;
2987         glu::ShaderProgram*             m_atomicCounterProgram;
2988 };
2989
2990 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2991         : TestCase                                      (context, name, description)
2992         , m_numCalls                            (numCalls)
2993         , m_workSize                            (workSize)
2994         , m_bufferID                            (DE_NULL)
2995         , m_ssboAtomicProgram           (DE_NULL)
2996         , m_atomicCounterProgram        (DE_NULL)
2997 {
2998         // SSBO atomic XORs cancel out
2999         DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
3000 }
3001
3002 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
3003 {
3004         deinit();
3005 }
3006
3007 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3008 {
3009         const glw::Functions&           gl                      = m_context.getRenderContext().getFunctions();
3010         const deUint32                          zeroBuf[2]      = { 0, 0 };
3011
3012         // gen buffer
3013
3014         gl.genBuffers(1, &m_bufferID);
3015         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3016         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3017
3018         GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3019
3020         // gen programs
3021
3022         {
3023                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3024
3025                 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3026                 m_testCtx.getLog() << *m_ssboAtomicProgram;
3027                 if (!m_ssboAtomicProgram->isOk())
3028                         throw tcu::TestError("could not build program");
3029         }
3030         {
3031                 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3032
3033                 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3034                 m_testCtx.getLog() << *m_atomicCounterProgram;
3035                 if (!m_atomicCounterProgram->isOk())
3036                         throw tcu::TestError("could not build program");
3037         }
3038 }
3039
3040 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3041 {
3042         if (m_bufferID)
3043         {
3044                 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3045                 m_bufferID = 0;
3046         }
3047
3048         delete m_ssboAtomicProgram;
3049         m_ssboAtomicProgram = DE_NULL;
3050
3051         delete m_atomicCounterProgram;
3052         m_atomicCounterProgram = DE_NULL;
3053 }
3054
3055 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3056 {
3057         const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3058
3059         m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3060
3061         // invoke programs N times
3062         {
3063                 m_testCtx.getLog()
3064                         << tcu::TestLog::Message
3065                         << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3066                         << "Num groups = (" << m_workSize << ", 1, 1)\n"
3067                         << tcu::TestLog::EndMessage;
3068
3069                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3070                 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_bufferID);
3071
3072                 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3073                 {
3074                         gl.useProgram(m_atomicCounterProgram->getProgram());
3075                         gl.dispatchCompute(m_workSize, 1, 1);
3076
3077                         gl.useProgram(m_ssboAtomicProgram->getProgram());
3078                         gl.dispatchCompute(m_workSize, 1, 1);
3079                 }
3080
3081                 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3082         }
3083
3084         // Verify result
3085         {
3086                 deUint32 result;
3087
3088                 // XORs cancel out, only addition is left
3089                 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3090
3091                 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3092                 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3093
3094                 if ((int)result != m_numCalls*m_workSize)
3095                 {
3096                         m_testCtx.getLog()
3097                                 << tcu::TestLog::Message
3098                                 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3099                                 << tcu::TestLog::EndMessage;
3100
3101                         m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3102                         return STOP;
3103                 }
3104
3105                 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3106         }
3107
3108         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3109         return STOP;
3110 }
3111
3112 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3113 {
3114         std::ostringstream buf;
3115
3116         buf     << "${GLSL_VERSION_DECL}\n"
3117                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3118                 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3119                 << "{\n"
3120                 << "    highp uint targetValue;\n"
3121                 << "    highp uint unused;\n"
3122                 << "} sb_work;\n"
3123                 << "\n"
3124                 << "void main ()\n"
3125                 << "{\n"
3126                 << "    // flip high bits\n"
3127                 << "    highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3128                 << "    sb_work.unused = atomicXor(sb_work.targetValue, mask);\n"
3129                 << "}";
3130
3131         return specializeShader(m_context, buf.str().c_str());
3132 }
3133
3134 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3135 {
3136         std::ostringstream buf;
3137
3138         buf     << "${GLSL_VERSION_DECL}\n"
3139                 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3140                 << "\n"
3141                 << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
3142                 << "\n"
3143                 << "void main ()\n"
3144                 << "{\n"
3145                 << "    atomicCounterIncrement(u_counter);\n"
3146                 << "}";
3147
3148         return specializeShader(m_context, buf.str().c_str());
3149 }
3150
3151 } // anonymous
3152
3153 SynchronizationTests::SynchronizationTests (Context& context)
3154         : TestCaseGroup(context, "synchronization", "Synchronization tests")
3155 {
3156 }
3157
3158 SynchronizationTests::~SynchronizationTests (void)
3159 {
3160 }
3161
3162 void SynchronizationTests::init (void)
3163 {
3164         tcu::TestCaseGroup* const inInvocationGroup             = new tcu::TestCaseGroup(m_testCtx, "in_invocation",    "Test intra-invocation synchronization");
3165         tcu::TestCaseGroup* const interInvocationGroup  = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3166         tcu::TestCaseGroup* const interCallGroup                = new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3167
3168         addChild(inInvocationGroup);
3169         addChild(interInvocationGroup);
3170         addChild(interCallGroup);
3171
3172         // .in_invocation & .inter_invocation
3173         {
3174                 static const struct CaseConfig
3175                 {
3176                         const char*                                                                     namePrefix;
3177                         const InterInvocationTestCase::StorageType      storage;
3178                         const int                                                                       flags;
3179                 } configs[] =
3180                 {
3181                         { "image",                      InterInvocationTestCase::STORAGE_IMAGE,         0                                                                               },
3182                         { "image_atomic",       InterInvocationTestCase::STORAGE_IMAGE,         InterInvocationTestCase::FLAG_ATOMIC    },
3183                         { "ssbo",                       InterInvocationTestCase::STORAGE_BUFFER,        0                                                                               },
3184                         { "ssbo_atomic",        InterInvocationTestCase::STORAGE_BUFFER,        InterInvocationTestCase::FLAG_ATOMIC    },
3185                 };
3186
3187                 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3188                 {
3189                         tcu::TestCaseGroup* const       targetGroup     = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3190                         const int                                       extraFlags      = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3191
3192                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3193                         {
3194                                 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3195
3196                                 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3197                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3198                                                                                                                                   (std::string("Write to ") + target + " and read it").c_str(),
3199                                                                                                                                   configs[configNdx].storage,
3200                                                                                                                                   configs[configNdx].flags | extraFlags));
3201
3202                                 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3203                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3204                                                                                                                                   (std::string("Read form ") + target + " and then write to it").c_str(),
3205                                                                                                                                   configs[configNdx].storage,
3206                                                                                                                                   configs[configNdx].flags | extraFlags));
3207
3208                                 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3209                                                                                                                                   (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3210                                                                                                                                   (std::string("Write to ") + target + " twice and read it").c_str(),
3211                                                                                                                                   configs[configNdx].storage,
3212                                                                                                                                   configs[configNdx].flags | extraFlags));
3213
3214                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3215                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3216                                                                                                                                    (std::string("Write to aliasing ") + target + " and read it").c_str(),
3217                                                                                                                                    InvocationAliasWriteCase::TYPE_WRITE,
3218                                                                                                                                    configs[configNdx].storage,
3219                                                                                                                                    configs[configNdx].flags | extraFlags));
3220
3221                                 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3222                                                                                                                                    (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3223                                                                                                                                    (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3224                                                                                                                                    InvocationAliasWriteCase::TYPE_OVERWRITE,
3225                                                                                                                                    configs[configNdx].storage,
3226                                                                                                                                    configs[configNdx].flags | extraFlags));
3227                         }
3228                 }
3229         }
3230
3231         // .inter_call
3232         {
3233                 tcu::TestCaseGroup* const withBarrierGroup              = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3234                 tcu::TestCaseGroup* const withoutBarrierGroup   = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3235
3236                 interCallGroup->addChild(withBarrierGroup);
3237                 interCallGroup->addChild(withoutBarrierGroup);
3238
3239                 // .with_memory_barrier
3240                 {
3241                         static const struct CaseConfig
3242                         {
3243                                 const char*                                                             namePrefix;
3244                                 const InterCallTestCase::StorageType    storage;
3245                                 const int                                                               flags;
3246                         } configs[] =
3247                         {
3248                                 { "image",                      InterCallTestCase::STORAGE_IMAGE,       0                                                                                                                                               },
3249                                 { "image_atomic",       InterCallTestCase::STORAGE_IMAGE,       InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3250                                 { "ssbo",                       InterCallTestCase::STORAGE_BUFFER,      0                                                                                                                                               },
3251                                 { "ssbo_atomic",        InterCallTestCase::STORAGE_BUFFER,      InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT    },
3252                         };
3253
3254                         const int seed0 = 123;
3255                         const int seed1 = 457;
3256
3257                         for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3258                         {
3259                                 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3260
3261                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3262                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3263                                                                                                                                  (std::string("Write to ") + target + " and read it").c_str(),
3264                                                                                                                                  configs[configNdx].storage,
3265                                                                                                                                  configs[configNdx].flags,
3266                                                                                                                                  InterCallOperations()
3267                                                                                                                                         << op::WriteData::Generate(1, seed0)
3268                                                                                                                                         << op::Barrier()
3269                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3270
3271                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3272                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3273                                                                                                                                  (std::string("Read from ") + target + " and then write to it").c_str(),
3274                                                                                                                                  configs[configNdx].storage,
3275                                                                                                                                  configs[configNdx].flags,
3276                                                                                                                                  InterCallOperations()
3277                                                                                                                                         << op::ReadZeroData::Generate(1)
3278                                                                                                                                         << op::Barrier()
3279                                                                                                                                         << op::WriteData::Generate(1, seed0)));
3280
3281                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3283                                                                                                                                  (std::string("Write to ") + target + " twice and read it").c_str(),
3284                                                                                                                                  configs[configNdx].storage,
3285                                                                                                                                  configs[configNdx].flags,
3286                                                                                                                                  InterCallOperations()
3287                                                                                                                                         << op::WriteData::Generate(1, seed0)
3288                                                                                                                                         << op::Barrier()
3289                                                                                                                                         << op::WriteData::Generate(1, seed1)
3290                                                                                                                                         << op::Barrier()
3291                                                                                                                                         << op::ReadData::Generate(1, seed1)));
3292
3293                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3294                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3295                                                                                                                                  (std::string("Write to multiple ") + target + "s and read them").c_str(),
3296                                                                                                                                  configs[configNdx].storage,
3297                                                                                                                                  configs[configNdx].flags,
3298                                                                                                                                  InterCallOperations()
3299                                                                                                                                         << op::WriteData::Generate(1, seed0)
3300                                                                                                                                         << op::WriteData::Generate(2, seed1)
3301                                                                                                                                         << op::Barrier()
3302                                                                                                                                         << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3303
3304                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3306                                                                                                                                  (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3307                                                                                                                                  configs[configNdx].storage,
3308                                                                                                                                  configs[configNdx].flags,
3309                                                                                                                                  InterCallOperations()
3310                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed0, true)
3311                                                                                                                                         << op::WriteDataInterleaved::Generate(1, seed1, false)
3312                                                                                                                                         << op::Barrier()
3313                                                                                                                                         << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3314
3315                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3316                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3317                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3318                                                                                                                                  configs[configNdx].storage,
3319                                                                                                                                  configs[configNdx].flags,
3320                                                                                                                                  InterCallOperations()
3321                                                                                                                                         << op::WriteData::Generate(1, seed0)
3322                                                                                                                                         << op::WriteData::Generate(2, seed1)
3323                                                                                                                                         << op::Barrier()
3324                                                                                                                                         << op::ReadData::Generate(1, seed0)
3325                                                                                                                                         << op::ReadData::Generate(2, seed1)));
3326
3327                                 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3328                                                                                                                                  (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3329                                                                                                                                  (std::string("Two unrelated ") + target + " write-reads").c_str(),
3330                                                                                                                                  configs[configNdx].storage,
3331                                                                                                                                  configs[configNdx].flags,
3332                                                                                                                                  InterCallOperations()
3333                                                                                                                                         << op::WriteData::Generate(1, seed0)
3334                                                                                                                                         << op::WriteData::Generate(2, seed1)
3335                                                                                                                                         << op::Barrier()
3336                                                                                                                                         << op::ReadData::Generate(2, seed1)
3337                                                                                                                                         << op::ReadData::Generate(1, seed0)));
3338                         }
3339
3340                         // .without_memory_barrier
3341                         {
3342                                 struct InvocationConfig
3343                                 {
3344                                         const char*     name;
3345                                         int                     count;
3346                                 };
3347
3348                                 static const InvocationConfig ssboInvocations[] =
3349                                 {
3350                                         { "1k",         1024    },
3351                                         { "4k",         4096    },
3352                                         { "32k",        32768   },
3353                                 };
3354                                 static const InvocationConfig imageInvocations[] =
3355                                 {
3356                                         { "8x8",                8       },
3357                                         { "32x32",              32      },
3358                                         { "128x128",    128     },
3359                                 };
3360                                 static const InvocationConfig counterInvocations[] =
3361                                 {
3362                                         { "32",         32              },
3363                                         { "128",        128             },
3364                                         { "1k",         1024    },
3365                                 };
3366                                 static const int callCounts[] = { 2, 5, 100 };
3367
3368                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3369                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370                                                 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),       "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3371
3372                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3373                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374                                                 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),    "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3375
3376                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378                                                 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),      "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379
3380                                 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3381                                         for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3382                                                 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),  "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3383                         }
3384                 }
3385         }
3386 }
3387
3388 } // Functional
3389 } // gles31
3390 } // deqp