1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
5 * Copyright 2014 The Android Open Source Project
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * \brief Synchronization Tests
22 *//*--------------------------------------------------------------------*/
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuStringTemplate.hpp"
27 #include "tcuSurface.hpp"
28 #include "tcuRenderTarget.hpp"
29 #include "gluRenderContext.hpp"
30 #include "gluShaderProgram.hpp"
31 #include "gluObjectWrapper.hpp"
32 #include "gluPixelTransfer.hpp"
33 #include "gluContextInfo.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deStringUtil.hpp"
37 #include "deSharedPtr.hpp"
39 #include "deRandom.hpp"
52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
54 std::vector<deUint32> chainDelta(valueChain.size());
56 for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
57 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
59 // chainDelta contains now the actual additions applied to the value
60 // check there exists an addition ramp form 1 to ...
61 std::sort(chainDelta.begin(), chainDelta.end());
63 for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
65 if ((int)chainDelta[callNdx] != callNdx+1)
67 invalidOperationNdx = callNdx;
68 errorDelta = chainDelta[callNdx];
69 errorExpected = callNdx+1;
78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
80 const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
81 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
84 throw tcu::TestError("mapBufferRange returned NULL");
86 result.resize(numElements);
87 memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
89 if (gl.unmapBuffer(target) == GL_FALSE)
90 throw tcu::TestError("unmapBuffer returned false");
93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
95 std::vector<deUint32> vec;
97 readBuffer(gl, target, 1, vec);
102 //! Generate a ramp of values from 1 to numElements, and shuffle it
103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
105 de::Random rng(0xabcd);
107 // some positive (non-zero) unique values
108 ramp.resize(numElements);
109 for (int callNdx = 0; callNdx < numElements; ++callNdx)
110 ramp[callNdx] = callNdx + 1;
112 rng.shuffle(ramp.begin(), ramp.end());
115 static std::string specializeShader(Context& context, const char* code)
117 const glu::GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(context.getRenderContext().getType());
118 std::map<std::string, std::string> specializationMap;
120 specializationMap["GLSL_VERSION_DECL"] = glu::getGLSLVersionDeclaration(glslVersion);
122 if (glu::contextSupports(context.getRenderContext().getType(), glu::ApiType::es(3, 2)))
123 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "";
125 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = "#extension GL_OES_shader_image_atomic : require";
127 return tcu::StringTemplate(code).specialize(specializationMap);
130 class InterInvocationTestCase : public TestCase
143 FLAG_ALIASING_STORAGES = 0x2,
147 InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
148 ~InterInvocationTestCase (void);
153 IterateResult iterate (void);
155 void runCompute (void);
156 bool verifyResults (void);
157 virtual std::string genShaderSource (void) const = 0;
160 std::string genBarrierSource (void) const;
162 const StorageType m_storage;
163 const bool m_useAtomic;
164 const bool m_aliasingStorages;
165 const bool m_syncWithGroup;
166 const int m_workWidth; // !< total work width
167 const int m_workHeight; // !< ... height
168 const int m_localWidth; // !< group width
169 const int m_localHeight; // !< group height
170 const int m_elementsPerInvocation; // !< elements accessed by a single invocation
173 glw::GLuint m_storageBuf;
174 glw::GLuint m_storageTex;
175 glw::GLuint m_resultBuf;
176 glu::ShaderProgram* m_program;
179 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
180 : TestCase (context, name, desc)
181 , m_storage (storage)
182 , m_useAtomic ((flags & FLAG_ATOMIC) != 0)
183 , m_aliasingStorages ((flags & FLAG_ALIASING_STORAGES) != 0)
184 , m_syncWithGroup ((flags & FLAG_IN_GROUP) != 0)
189 , m_elementsPerInvocation (8)
193 , m_program (DE_NULL)
195 DE_ASSERT(m_storage < STORAGE_LAST);
196 DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
199 InterInvocationTestCase::~InterInvocationTestCase (void)
204 void InterInvocationTestCase::init (void)
206 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
207 const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
211 if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
212 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
216 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
217 m_testCtx.getLog() << *m_program;
218 if (!m_program->isOk())
219 throw tcu::TestError("could not build program");
223 if (m_storage == STORAGE_BUFFER)
225 const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
226 const int bufferSize = bufferElements * (int)sizeof(deUint32);
227 std::vector<deUint32> zeroBuffer (bufferElements, 0);
229 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
231 gl.genBuffers(1, &m_storageBuf);
232 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
233 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
234 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
236 else if (m_storage == STORAGE_IMAGE)
238 const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
239 const int bufferSize = bufferElements * (int)sizeof(deUint32);
241 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
243 gl.genTextures(1, &m_storageTex);
244 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
245 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
246 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
247 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
248 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
251 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
254 const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
255 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
256 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
265 const int bufferElements = m_workWidth * m_workHeight;
266 const int bufferSize = bufferElements * (int)sizeof(deUint32);
267 std::vector<deInt32> negativeBuffer (bufferElements, -1);
269 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
271 gl.genBuffers(1, &m_resultBuf);
272 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
273 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
274 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
278 void InterInvocationTestCase::deinit (void)
282 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
283 m_storageBuf = DE_NULL;
288 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
289 m_storageTex = DE_NULL;
294 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
295 m_resultBuf = DE_NULL;
302 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
307 // Verify buffer contents
309 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
311 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
316 void InterInvocationTestCase::runCompute (void)
318 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
319 const int groupsX = m_workWidth / m_localWidth;
320 const int groupsY = m_workHeight / m_localHeight;
322 DE_ASSERT((m_workWidth % m_localWidth) == 0);
323 DE_ASSERT((m_workHeight % m_localHeight) == 0);
326 << tcu::TestLog::Message
327 << "Dispatching compute.\n"
328 << " group size: " << m_localWidth << "x" << m_localHeight << "\n"
329 << " dispatch size: " << groupsX << "x" << groupsY << "\n"
330 << " total work size: " << m_workWidth << "x" << m_workHeight << "\n"
331 << tcu::TestLog::EndMessage;
333 gl.useProgram(m_program->getProgram());
336 if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
338 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
339 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
341 else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
343 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
344 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
345 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
347 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
349 else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
351 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
352 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
354 else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
356 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
357 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
359 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
361 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
367 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
368 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
371 gl.dispatchCompute(groupsX, groupsY, 1);
372 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
375 bool InterInvocationTestCase::verifyResults (void)
377 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
378 const int errorFloodThreshold = 5;
379 int numErrorsLogged = 0;
380 const void* mapped = DE_NULL;
381 std::vector<deInt32> results (m_workWidth * m_workHeight);
384 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
385 mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
386 GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
388 // copy to properly aligned array
389 deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
391 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
392 throw tcu::TestError("memory map store corrupted");
395 for (int ndx = 0; ndx < (int)results.size(); ++ndx)
397 if (results[ndx] != 1)
401 if (numErrorsLogged == 0)
402 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
403 if (numErrorsLogged++ < errorFloodThreshold)
404 m_testCtx.getLog() << tcu::TestLog::Message << " Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
407 // after N errors, no point continuing verification
408 m_testCtx.getLog() << tcu::TestLog::Message << " -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
415 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
419 std::string InterInvocationTestCase::genBarrierSource (void) const
421 std::ostringstream buf;
425 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
426 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
427 // we only require intra-workgroup synchronization.
429 << " groupMemoryBarrier();\n"
433 else if (m_storage == STORAGE_BUFFER)
435 DE_ASSERT(!m_syncWithGroup);
437 // Waiting only for data written by this invocation. Since all buffer reads and writes are
438 // processed in order (within a single invocation), we don't have to do anything.
441 else if (m_storage == STORAGE_IMAGE)
443 DE_ASSERT(!m_syncWithGroup);
445 // Waiting only for data written by this invocation. But since operations complete in undefined
446 // order, we have to wait for them to complete.
448 << " memoryBarrierImage();\n"
457 class InvocationBasicCase : public InterInvocationTestCase
460 InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
462 std::string genShaderSource (void) const;
463 virtual std::string genShaderMainBlock (void) const = 0;
466 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
467 : InterInvocationTestCase(context, name, desc, storage, flags)
471 std::string InvocationBasicCase::genShaderSource (void) const
473 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
474 std::ostringstream buf;
476 buf << "${GLSL_VERSION_DECL}\n"
477 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
478 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
479 << "layout(binding=0, std430) buffer Output\n"
481 << " highp int values[];\n"
484 if (m_storage == STORAGE_BUFFER)
485 buf << "layout(binding=1, std430) coherent buffer Storage\n"
487 << " highp int values[];\n"
490 << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
492 << " highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
493 << " return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
495 else if (m_storage == STORAGE_IMAGE)
496 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
498 << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
500 << " return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
506 << "void main (void)\n"
508 << " int resultNdx = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
509 << " int groupNdx = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
510 << " bool allOk = true;\n"
512 << genShaderMainBlock()
514 << " sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
517 return specializeShader(m_context, buf.str().c_str());
520 class InvocationWriteReadCase : public InvocationBasicCase
523 InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
525 std::string genShaderMainBlock (void) const;
528 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
529 : InvocationBasicCase(context, name, desc, storage, flags)
533 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
535 std::ostringstream buf;
539 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
541 if (m_storage == STORAGE_BUFFER && m_useAtomic)
542 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
543 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
544 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
545 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
546 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
547 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
548 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
555 buf << genBarrierSource();
559 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
561 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
563 if (m_storage == STORAGE_BUFFER && m_useAtomic)
564 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
565 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
566 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
567 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
568 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
569 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
570 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
578 class InvocationReadWriteCase : public InvocationBasicCase
581 InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
583 std::string genShaderMainBlock (void) const;
586 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
587 : InvocationBasicCase(context, name, desc, storage, flags)
591 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
593 std::ostringstream buf;
597 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
599 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
601 if (m_storage == STORAGE_BUFFER && m_useAtomic)
602 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
603 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
604 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
605 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
606 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
607 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
608 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
615 buf << genBarrierSource();
619 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
621 if (m_storage == STORAGE_BUFFER && m_useAtomic)
622 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
623 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
624 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
625 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
626 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
627 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
628 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
636 class InvocationOverWriteCase : public InvocationBasicCase
639 InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
641 std::string genShaderMainBlock (void) const;
644 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
645 : InvocationBasicCase(context, name, desc, storage, flags)
649 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
651 std::ostringstream buf;
655 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
657 if (m_storage == STORAGE_BUFFER && m_useAtomic)
658 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
659 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
660 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
661 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
662 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
663 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
664 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
671 buf << genBarrierSource();
675 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
677 // write another invocation's value or our own value depending on test type
678 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
680 if (m_storage == STORAGE_BUFFER && m_useAtomic)
681 buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
682 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
683 buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
684 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
685 buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
686 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
687 buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
694 buf << genBarrierSource();
698 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
700 // check another invocation's value or our own value depending on test type
701 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
703 if (m_storage == STORAGE_BUFFER && m_useAtomic)
704 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
705 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
706 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
707 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
708 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
709 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
710 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
718 class InvocationAliasWriteCase : public InterInvocationTestCase
729 InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
731 std::string genShaderSource (void) const;
733 const TestType m_type;
736 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
737 : InterInvocationTestCase (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
740 DE_ASSERT(type < TYPE_LAST);
743 std::string InvocationAliasWriteCase::genShaderSource (void) const
745 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
746 std::ostringstream buf;
748 buf << "${GLSL_VERSION_DECL}\n"
749 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
750 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
751 << "layout(binding=0, std430) buffer Output\n"
753 << " highp int values[];\n"
756 if (m_storage == STORAGE_BUFFER)
757 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
759 << " highp int values[];\n"
761 << "layout(binding=2, std430) coherent buffer Storage1\n"
763 << " highp int values[];\n"
766 << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
768 << " highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
769 << " return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
771 else if (m_storage == STORAGE_IMAGE)
772 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
773 << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
775 << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
777 << " return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
783 << "void main (void)\n"
785 << " int resultNdx = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
786 << " int groupNdx = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
787 << " bool allOk = true;\n"
790 if (m_type == TYPE_OVERWRITE)
794 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
796 if (m_storage == STORAGE_BUFFER && m_useAtomic)
797 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
798 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
799 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
800 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
801 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
802 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
803 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
810 buf << genBarrierSource();
813 DE_ASSERT(m_type == TYPE_WRITE);
817 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
819 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
821 if (m_storage == STORAGE_BUFFER && m_useAtomic)
822 buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
823 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
824 buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
825 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
826 buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
827 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
828 buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
835 buf << genBarrierSource();
839 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
841 if (m_storage == STORAGE_BUFFER && m_useAtomic)
842 buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
843 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
844 buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
845 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
846 buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
847 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
848 buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
856 << " sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
859 return specializeShader(m_context, buf.str().c_str());
870 static WriteData Generate(int targetHandle, int seed)
874 retVal.targetHandle = targetHandle;
886 static ReadData Generate(int targetHandle, int seed)
890 retVal.targetHandle = targetHandle;
901 struct WriteDataInterleaved
907 static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
909 WriteDataInterleaved retVal;
911 retVal.targetHandle = targetHandle;
913 retVal.evenOdd = evenOdd;
919 struct ReadDataInterleaved
925 static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
927 ReadDataInterleaved retVal;
929 retVal.targetHandle = targetHandle;
930 retVal.seed0 = seed0;
931 retVal.seed1 = seed1;
937 struct ReadMultipleData
944 static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
946 ReadMultipleData retVal;
948 retVal.targetHandle0 = targetHandle0;
949 retVal.seed0 = seed0;
950 retVal.targetHandle1 = targetHandle1;
951 retVal.seed1 = seed1;
961 static ReadZeroData Generate(int targetHandle)
965 retVal.targetHandle = targetHandle;
973 class InterCallTestCase;
975 class InterCallOperations
978 InterCallOperations& operator<< (const op::WriteData&);
979 InterCallOperations& operator<< (const op::ReadData&);
980 InterCallOperations& operator<< (const op::Barrier&);
981 InterCallOperations& operator<< (const op::ReadMultipleData&);
982 InterCallOperations& operator<< (const op::WriteDataInterleaved&);
983 InterCallOperations& operator<< (const op::ReadDataInterleaved&);
984 InterCallOperations& operator<< (const op::ReadZeroData&);
995 TYPE_WRITE_INTERLEAVE,
996 TYPE_READ_INTERLEAVE,
1006 op::WriteData write;
1008 op::Barrier barrier;
1009 op::ReadMultipleData readMulti;
1010 op::WriteDataInterleaved writeInterleave;
1011 op::ReadDataInterleaved readInterleave;
1012 op::ReadZeroData readZero;
1016 friend class InterCallTestCase;
1018 std::vector<Command> m_cmds;
1021 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1023 m_cmds.push_back(Command());
1024 m_cmds.back().type = Command::TYPE_WRITE;
1025 m_cmds.back().u_cmd.write = cmd;
1030 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1032 m_cmds.push_back(Command());
1033 m_cmds.back().type = Command::TYPE_READ;
1034 m_cmds.back().u_cmd.read = cmd;
1039 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1041 m_cmds.push_back(Command());
1042 m_cmds.back().type = Command::TYPE_BARRIER;
1043 m_cmds.back().u_cmd.barrier = cmd;
1048 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1050 m_cmds.push_back(Command());
1051 m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1052 m_cmds.back().u_cmd.readMulti = cmd;
1057 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1059 m_cmds.push_back(Command());
1060 m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1061 m_cmds.back().u_cmd.writeInterleave = cmd;
1066 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1068 m_cmds.push_back(Command());
1069 m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1070 m_cmds.back().u_cmd.readInterleave = cmd;
1075 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1077 m_cmds.push_back(Command());
1078 m_cmds.back().type = Command::TYPE_READ_ZERO;
1079 m_cmds.back().u_cmd.readZero = cmd;
1084 class InterCallTestCase : public TestCase
1096 FLAG_USE_ATOMIC = 1,
1099 InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1100 ~InterCallTestCase (void);
1105 IterateResult iterate (void);
1106 bool verifyResults (void);
1108 void runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1109 void runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1110 void runCommand (const op::Barrier&);
1111 void runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1112 void runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1113 void runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1114 void runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1115 void runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117 glw::GLuint genStorage (int friendlyName);
1118 glw::GLuint genResultStorage (void);
1119 glu::ShaderProgram* genWriteProgram (int seed);
1120 glu::ShaderProgram* genReadProgram (int seed);
1121 glu::ShaderProgram* genReadMultipleProgram (int seed0, int seed1);
1122 glu::ShaderProgram* genWriteInterleavedProgram (int seed, bool evenOdd);
1123 glu::ShaderProgram* genReadInterleavedProgram (int seed0, int seed1);
1124 glu::ShaderProgram* genReadZeroProgram (void);
1126 const StorageType m_storage;
1127 const int m_invocationGridSize; // !< width and height of the two dimensional work dispatch
1128 const int m_perInvocationSize; // !< number of elements accessed in single invocation
1129 const std::vector<InterCallOperations::Command> m_cmds;
1130 const bool m_useAtomic;
1131 const bool m_formatInteger;
1133 std::vector<glu::ShaderProgram*> m_operationPrograms;
1134 std::vector<glw::GLuint> m_operationResultStorages;
1135 std::map<int, glw::GLuint> m_storageIDs;
1138 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1139 : TestCase (context, name, desc)
1140 , m_storage (storage)
1141 , m_invocationGridSize (512)
1142 , m_perInvocationSize (2)
1143 , m_cmds (ops.m_cmds)
1144 , m_useAtomic ((flags & FLAG_USE_ATOMIC) != 0)
1145 , m_formatInteger ((flags & FLAG_USE_INT) != 0)
1149 InterCallTestCase::~InterCallTestCase (void)
1154 void InterCallTestCase::init (void)
1156 int programFriendlyName = 0;
1157 const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
1161 if (m_useAtomic && m_storage == STORAGE_IMAGE && !supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1162 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1164 // generate resources and validate command list
1166 m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1167 m_operationResultStorages.resize(m_cmds.size(), 0);
1169 for (int step = 0; step < (int)m_cmds.size(); ++step)
1171 switch (m_cmds[step].type)
1173 case InterCallOperations::Command::TYPE_WRITE:
1175 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1177 // new storage handle?
1178 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1179 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1183 glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1185 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1186 m_testCtx.getLog() << *program;
1188 if (!program->isOk())
1189 throw tcu::TestError("could not build program");
1191 m_operationPrograms[step] = program;
1196 case InterCallOperations::Command::TYPE_READ:
1198 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1199 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1201 // program and result storage
1203 glu::ShaderProgram* program = genReadProgram(cmd.seed);
1205 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1206 m_testCtx.getLog() << *program;
1208 if (!program->isOk())
1209 throw tcu::TestError("could not build program");
1211 m_operationPrograms[step] = program;
1212 m_operationResultStorages[step] = genResultStorage();
1217 case InterCallOperations::Command::TYPE_BARRIER:
1222 case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1224 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1225 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1226 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1230 glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1232 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1233 m_testCtx.getLog() << *program;
1235 if (!program->isOk())
1236 throw tcu::TestError("could not build program");
1238 m_operationPrograms[step] = program;
1239 m_operationResultStorages[step] = genResultStorage();
1244 case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1246 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1248 // new storage handle?
1249 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1250 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1254 glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1256 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1257 m_testCtx.getLog() << *program;
1259 if (!program->isOk())
1260 throw tcu::TestError("could not build program");
1262 m_operationPrograms[step] = program;
1267 case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1269 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1270 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1274 glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1276 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1277 m_testCtx.getLog() << *program;
1279 if (!program->isOk())
1280 throw tcu::TestError("could not build program");
1282 m_operationPrograms[step] = program;
1283 m_operationResultStorages[step] = genResultStorage();
1288 case InterCallOperations::Command::TYPE_READ_ZERO:
1290 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1292 // new storage handle?
1293 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1294 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1298 glu::ShaderProgram* program = genReadZeroProgram();
1300 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1301 m_testCtx.getLog() << *program;
1303 if (!program->isOk())
1304 throw tcu::TestError("could not build program");
1306 m_operationPrograms[step] = program;
1307 m_operationResultStorages[step] = genResultStorage();
1313 DE_ASSERT(DE_FALSE);
1318 void InterCallTestCase::deinit (void)
1321 for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1322 delete m_operationPrograms[ndx];
1323 m_operationPrograms.clear();
1326 for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1328 if (m_operationResultStorages[ndx])
1329 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1331 m_operationResultStorages.clear();
1334 for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1336 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1338 if (m_storage == STORAGE_BUFFER)
1339 gl.deleteBuffers(1, &it->second);
1340 else if (m_storage == STORAGE_IMAGE)
1341 gl.deleteTextures(1, &it->second);
1343 DE_ASSERT(DE_FALSE);
1345 m_storageIDs.clear();
1348 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1350 int programFriendlyName = 0;
1351 int resultStorageFriendlyName = 0;
1353 m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1357 for (int step = 0; step < (int)m_cmds.size(); ++step)
1359 switch (m_cmds[step].type)
1361 case InterCallOperations::Command::TYPE_WRITE: runCommand(m_cmds[step].u_cmd.write, step, programFriendlyName); break;
1362 case InterCallOperations::Command::TYPE_READ: runCommand(m_cmds[step].u_cmd.read, step, programFriendlyName, resultStorageFriendlyName); break;
1363 case InterCallOperations::Command::TYPE_BARRIER: runCommand(m_cmds[step].u_cmd.barrier); break;
1364 case InterCallOperations::Command::TYPE_READ_MULTIPLE: runCommand(m_cmds[step].u_cmd.readMulti, step, programFriendlyName, resultStorageFriendlyName); break;
1365 case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE: runCommand(m_cmds[step].u_cmd.writeInterleave, step, programFriendlyName); break;
1366 case InterCallOperations::Command::TYPE_READ_INTERLEAVE: runCommand(m_cmds[step].u_cmd.readInterleave, step, programFriendlyName, resultStorageFriendlyName); break;
1367 case InterCallOperations::Command::TYPE_READ_ZERO: runCommand(m_cmds[step].u_cmd.readZero, step, programFriendlyName, resultStorageFriendlyName); break;
1369 DE_ASSERT(DE_FALSE);
1373 // read results from result buffers
1374 if (verifyResults())
1375 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1377 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1382 bool InterCallTestCase::verifyResults (void)
1384 int resultBufferFriendlyName = 0;
1385 bool allResultsOk = true;
1386 bool anyResult = false;
1388 m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1390 for (int step = 0; step < (int)m_cmds.size(); ++step)
1392 const int errorFloodThreshold = 5;
1393 int numErrorsLogged = 0;
1395 if (m_operationResultStorages[step])
1397 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1398 const void* mapped = DE_NULL;
1399 std::vector<deInt32> results (m_invocationGridSize * m_invocationGridSize);
1404 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1405 mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1406 GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1408 // copy to properly aligned array
1409 deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1411 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1412 throw tcu::TestError("memory map store corrupted");
1414 // check the results
1415 for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1417 if (results[ndx] != 1)
1421 if (numErrorsLogged == 0)
1422 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1423 if (numErrorsLogged++ < errorFloodThreshold)
1424 m_testCtx.getLog() << tcu::TestLog::Message << " Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1427 // after N errors, no point continuing verification
1428 m_testCtx.getLog() << tcu::TestLog::Message << " -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1436 allResultsOk = false;
1439 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1443 DE_ASSERT(anyResult);
1444 DE_UNREF(anyResult);
1446 return allResultsOk;
1449 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1451 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1454 << tcu::TestLog::Message
1455 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1456 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1457 << tcu::TestLog::EndMessage;
1459 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1462 if (m_storage == STORAGE_BUFFER)
1464 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1466 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1467 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1469 else if (m_storage == STORAGE_IMAGE)
1471 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1473 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1474 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1477 DE_ASSERT(DE_FALSE);
1480 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1481 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1484 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1486 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1489 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1491 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1495 if (m_storage == STORAGE_BUFFER)
1497 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1498 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1500 else if (m_storage == STORAGE_IMAGE)
1502 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1503 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1506 DE_ASSERT(DE_FALSE);
1509 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1511 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1514 << tcu::TestLog::Message
1515 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1516 << " Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1517 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1518 << tcu::TestLog::EndMessage;
1520 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1523 if (m_storage == STORAGE_BUFFER)
1525 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1526 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1528 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1529 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1530 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1532 else if (m_storage == STORAGE_IMAGE)
1534 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1535 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1537 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1538 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1539 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1542 DE_ASSERT(DE_FALSE);
1545 DE_ASSERT(m_operationResultStorages[stepNdx]);
1546 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1547 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1550 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1551 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1554 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1556 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1559 << tcu::TestLog::Message
1560 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1561 << " Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1562 << " Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1563 << tcu::TestLog::EndMessage;
1565 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1568 if (m_storage == STORAGE_BUFFER)
1570 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1572 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1573 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1575 else if (m_storage == STORAGE_IMAGE)
1577 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1579 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1580 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1583 DE_ASSERT(DE_FALSE);
1586 gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1587 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1590 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1592 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1595 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1600 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1605 << tcu::TestLog::Message
1606 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1607 << " Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1608 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1609 << tcu::TestLog::EndMessage;
1611 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1614 if (m_storage == STORAGE_BUFFER)
1616 DE_ASSERT(m_storageIDs[targetHandle]);
1618 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1619 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1621 else if (m_storage == STORAGE_IMAGE)
1623 DE_ASSERT(m_storageIDs[targetHandle]);
1625 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1626 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1629 DE_ASSERT(DE_FALSE);
1632 DE_ASSERT(m_operationResultStorages[stepNdx]);
1633 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1634 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1637 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1638 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1641 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1643 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1645 if (m_storage == STORAGE_BUFFER)
1647 const int numElements = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1648 const int bufferSize = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1649 glw::GLuint retVal = 0;
1651 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1653 gl.genBuffers(1, &retVal);
1654 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1656 if (m_formatInteger)
1658 const std::vector<deUint32> zeroBuffer(numElements, 0);
1659 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1663 const std::vector<float> zeroBuffer(numElements, 0.0f);
1664 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1670 else if (m_storage == STORAGE_IMAGE)
1672 const int imageWidth = m_invocationGridSize;
1673 const int imageHeight = m_invocationGridSize * m_perInvocationSize;
1674 glw::GLuint retVal = 0;
1677 << tcu::TestLog::Message
1678 << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1679 << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1680 << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1681 << tcu::TestLog::EndMessage;
1683 gl.genTextures(1, &retVal);
1684 gl.bindTexture(GL_TEXTURE_2D, retVal);
1686 if (m_formatInteger)
1687 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1689 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1691 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1692 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1693 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1696 << tcu::TestLog::Message
1697 << "Filling image with 0"
1698 << tcu::TestLog::EndMessage;
1700 if (m_formatInteger)
1702 const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1703 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1707 const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1708 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1711 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1717 DE_ASSERT(DE_FALSE);
1722 glw::GLuint InterCallTestCase::genResultStorage (void)
1724 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1725 glw::GLuint retVal = 0;
1727 gl.genBuffers(1, &retVal);
1728 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1729 gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1730 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1735 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1737 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1738 std::ostringstream buf;
1740 buf << "${GLSL_VERSION_DECL}\n"
1741 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1742 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1744 if (m_storage == STORAGE_BUFFER)
1745 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1747 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1749 else if (m_storage == STORAGE_IMAGE)
1750 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1752 DE_ASSERT(DE_FALSE);
1755 << "void main (void)\n"
1757 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1758 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1761 // Write to buffer/image m_perInvocationSize elements
1762 if (m_storage == STORAGE_BUFFER)
1764 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1767 buf << " atomicExchange(";
1771 buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1774 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1776 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1779 else if (m_storage == STORAGE_IMAGE)
1781 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1784 buf << " imageAtomicExchange";
1786 buf << " imageStore";
1788 buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1791 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1793 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1797 DE_ASSERT(DE_FALSE);
1801 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1804 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1806 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1807 std::ostringstream buf;
1809 buf << "${GLSL_VERSION_DECL}\n"
1810 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1811 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1813 if (m_storage == STORAGE_BUFFER)
1814 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1816 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1818 else if (m_storage == STORAGE_IMAGE)
1819 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1821 DE_ASSERT(DE_FALSE);
1823 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1825 << " highp int resultOk[];\n"
1828 << "void main (void)\n"
1830 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1831 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1832 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1833 << " bool allOk = true;\n"
1838 if (m_storage == STORAGE_BUFFER)
1840 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1843 buf << " allOk = allOk && (sb_in.values[(groupNdx + "
1844 << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1845 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1847 buf << " allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1848 << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1849 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1852 else if (m_storage == STORAGE_IMAGE)
1854 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1857 buf << " allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1858 << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1859 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1861 buf << " allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1862 << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1863 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1867 DE_ASSERT(DE_FALSE);
1869 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1872 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1875 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1877 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1878 std::ostringstream buf;
1880 buf << "${GLSL_VERSION_DECL}\n"
1881 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1882 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1884 if (m_storage == STORAGE_BUFFER)
1885 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1887 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1889 << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1891 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1893 else if (m_storage == STORAGE_IMAGE)
1894 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1895 << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1897 DE_ASSERT(DE_FALSE);
1899 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1901 << " highp int resultOk[];\n"
1904 << "void main (void)\n"
1906 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1907 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1908 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1909 << " bool allOk = true;\n"
1914 if (m_storage == STORAGE_BUFFER)
1916 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1917 buf << " allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1918 << " allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1920 else if (m_storage == STORAGE_IMAGE)
1922 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923 buf << " allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924 << " allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1927 DE_ASSERT(DE_FALSE);
1929 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1932 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1935 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1937 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1938 std::ostringstream buf;
1940 buf << "${GLSL_VERSION_DECL}\n"
1941 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1942 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1944 if (m_storage == STORAGE_BUFFER)
1945 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1947 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1949 else if (m_storage == STORAGE_IMAGE)
1950 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1952 DE_ASSERT(DE_FALSE);
1955 << "void main (void)\n"
1957 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1958 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1961 // Write to buffer/image m_perInvocationSize elements
1962 if (m_storage == STORAGE_BUFFER)
1964 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1967 buf << " atomicExchange(";
1971 buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1974 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1976 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1979 else if (m_storage == STORAGE_IMAGE)
1981 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1984 buf << " imageAtomicExchange";
1986 buf << " imageStore";
1988 buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1991 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1993 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1997 DE_ASSERT(DE_FALSE);
2001 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2004 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2006 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2007 std::ostringstream buf;
2009 buf << "${GLSL_VERSION_DECL}\n"
2010 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2011 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2013 if (m_storage == STORAGE_BUFFER)
2014 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2016 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2018 else if (m_storage == STORAGE_IMAGE)
2019 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2021 DE_ASSERT(DE_FALSE);
2023 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2025 << " highp int resultOk[];\n"
2028 << "void main (void)\n"
2030 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2031 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2032 << " int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2033 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2034 << " bool allOk = true;\n"
2039 if (m_storage == STORAGE_BUFFER)
2041 buf << " if (groupNdx % 2 == 0)\n"
2043 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2044 buf << " allOk = allOk && ("
2045 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2046 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2050 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2051 buf << " allOk = allOk && ("
2052 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2053 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2056 else if (m_storage == STORAGE_IMAGE)
2058 buf << " if (groupNdx % 2 == 0)\n"
2060 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2061 buf << " allOk = allOk && ("
2062 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2063 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2064 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2068 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2069 buf << " allOk = allOk && ("
2070 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2071 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2072 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2076 DE_ASSERT(DE_FALSE);
2078 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2081 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2084 glu::ShaderProgram* InterCallTestCase::genReadZeroProgram (void)
2086 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2087 std::ostringstream buf;
2089 buf << "${GLSL_VERSION_DECL}\n"
2090 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2091 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2093 if (m_storage == STORAGE_BUFFER)
2094 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2096 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2098 else if (m_storage == STORAGE_IMAGE)
2099 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2101 DE_ASSERT(DE_FALSE);
2103 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2105 << " highp int resultOk[];\n"
2108 << "void main (void)\n"
2110 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2111 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2112 << " " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2113 << " bool allOk = true;\n"
2118 if (m_storage == STORAGE_BUFFER)
2120 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2121 buf << " allOk = allOk && ("
2122 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2123 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2125 else if (m_storage == STORAGE_IMAGE)
2127 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2128 buf << " allOk = allOk && ("
2129 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2130 << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2133 DE_ASSERT(DE_FALSE);
2135 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2138 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2141 class SSBOConcurrentAtomicCase : public TestCase
2145 SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2146 ~SSBOConcurrentAtomicCase (void);
2150 IterateResult iterate (void);
2153 std::string genComputeSource (void) const;
2155 const int m_numCalls;
2156 const int m_workSize;
2157 glu::ShaderProgram* m_program;
2158 deUint32 m_bufferID;
2159 std::vector<deUint32> m_intermediateResultBuffers;
2162 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2163 : TestCase (context, name, description)
2164 , m_numCalls (numCalls)
2165 , m_workSize (workSize)
2166 , m_program (DE_NULL)
2167 , m_bufferID (DE_NULL)
2171 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2176 void SSBOConcurrentAtomicCase::init (void)
2178 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2179 std::vector<deUint32> zeroData (m_workSize, 0);
2183 gl.genBuffers(1, &m_bufferID);
2184 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2185 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2187 for (int ndx = 0; ndx < m_numCalls; ++ndx)
2189 deUint32 buffer = 0;
2191 gl.genBuffers(1, &buffer);
2192 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2193 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2195 m_intermediateResultBuffers.push_back(buffer);
2196 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2201 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2202 m_testCtx.getLog() << *m_program;
2203 if (!m_program->isOk())
2204 throw tcu::TestError("could not build program");
2207 void SSBOConcurrentAtomicCase::deinit (void)
2211 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2215 for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2216 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2217 m_intermediateResultBuffers.clear();
2220 m_program = DE_NULL;
2223 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2225 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2226 const deUint32 sumValue = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2227 std::vector<int> deltas;
2229 // generate unique deltas
2230 generateShuffledRamp(m_numCalls, deltas);
2232 // invoke program N times, each with a different delta
2234 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2237 << tcu::TestLog::Message
2238 << "Running shader " << m_numCalls << " times.\n"
2239 << "Num groups = (" << m_workSize << ", 1, 1)\n"
2240 << "Setting u_atomicDelta to a unique value for each call.\n"
2241 << tcu::TestLog::EndMessage;
2243 if (deltaLocation == -1)
2244 throw tcu::TestError("u_atomicDelta location was -1");
2246 gl.useProgram(m_program->getProgram());
2247 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2249 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2252 << tcu::TestLog::Message
2253 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2254 << tcu::TestLog::EndMessage;
2256 gl.uniform1ui(deltaLocation, deltas[callNdx]);
2257 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2258 gl.dispatchCompute(m_workSize, 1, 1);
2261 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2266 std::vector<deUint32> result;
2268 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2270 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2271 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2273 for (int ndx = 0; ndx < m_workSize; ++ndx)
2275 if (result[ndx] != sumValue)
2278 << tcu::TestLog::Message
2279 << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2280 << "Work buffer contains invalid values."
2281 << tcu::TestLog::EndMessage;
2283 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2288 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2293 std::vector<std::vector<deUint32> > intermediateResults (m_numCalls);
2294 std::vector<deUint32> valueChain (m_numCalls);
2296 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2300 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2302 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2303 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2308 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2310 int invalidOperationNdx;
2311 deUint32 errorDelta;
2312 deUint32 errorExpected;
2314 // collect result chain for each element
2315 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2316 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2318 // check there exists a path from 0 to sumValue using each addition once
2319 // decompose cumulative results to addition operations (all additions positive => this works)
2321 std::sort(valueChain.begin(), valueChain.end());
2324 if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2327 << tcu::TestLog::Message
2328 << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2329 << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2330 << tcu::TestLog::EndMessage;
2332 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2333 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2334 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2336 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2341 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2344 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2348 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2350 std::ostringstream buf;
2352 buf << "${GLSL_VERSION_DECL}\n"
2353 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2354 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2356 << " highp uint values[" << m_workSize << "];\n"
2359 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2361 << " highp uint values[" << m_workSize << "];\n"
2363 << "uniform highp uint u_atomicDelta;\n"
2367 << " highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2368 << " sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2371 return specializeShader(m_context, buf.str().c_str());
2374 class ConcurrentAtomicCounterCase : public TestCase
2378 ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2379 ~ConcurrentAtomicCounterCase (void);
2383 IterateResult iterate (void);
2386 std::string genComputeSource (bool evenOdd) const;
2388 const int m_numCalls;
2389 const int m_workSize;
2390 glu::ShaderProgram* m_evenProgram;
2391 glu::ShaderProgram* m_oddProgram;
2392 deUint32 m_counterBuffer;
2393 deUint32 m_intermediateResultBuffer;
2396 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2397 : TestCase (context, name, description)
2398 , m_numCalls (numCalls)
2399 , m_workSize (workSize)
2400 , m_evenProgram (DE_NULL)
2401 , m_oddProgram (DE_NULL)
2402 , m_counterBuffer (DE_NULL)
2403 , m_intermediateResultBuffer(DE_NULL)
2407 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2412 void ConcurrentAtomicCounterCase::init (void)
2414 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2415 const std::vector<deUint32> zeroData (m_numCalls * m_workSize, 0);
2419 gl.genBuffers(1, &m_counterBuffer);
2420 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2421 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2423 gl.genBuffers(1, &m_intermediateResultBuffer);
2424 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2425 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2427 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2432 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2434 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2435 m_testCtx.getLog() << *m_evenProgram;
2436 if (!m_evenProgram->isOk())
2437 throw tcu::TestError("could not build program");
2440 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2442 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2443 m_testCtx.getLog() << *m_oddProgram;
2444 if (!m_oddProgram->isOk())
2445 throw tcu::TestError("could not build program");
2449 void ConcurrentAtomicCounterCase::deinit (void)
2451 if (m_counterBuffer)
2453 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2454 m_counterBuffer = 0;
2456 if (m_intermediateResultBuffer)
2458 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2459 m_intermediateResultBuffer = 0;
2462 delete m_evenProgram;
2463 m_evenProgram = DE_NULL;
2465 delete m_oddProgram;
2466 m_oddProgram = DE_NULL;
2469 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2471 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2473 // invoke program N times, each with a different delta
2475 const int evenCallNdxLocation = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2476 const int oddCallNdxLocation = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2479 << tcu::TestLog::Message
2480 << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2481 << "Num groups = (" << m_workSize << ", 1, 1)\n"
2482 << tcu::TestLog::EndMessage;
2484 if (evenCallNdxLocation == -1)
2485 throw tcu::TestError("u_callNdx location was -1");
2486 if (oddCallNdxLocation == -1)
2487 throw tcu::TestError("u_callNdx location was -1");
2489 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2490 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2492 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2494 gl.useProgram(m_evenProgram->getProgram());
2495 gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2496 gl.dispatchCompute(m_workSize, 1, 1);
2498 gl.useProgram(m_oddProgram->getProgram());
2499 gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2500 gl.dispatchCompute(m_workSize, 1, 1);
2503 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2510 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2512 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2513 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2515 if ((int)result != m_numCalls*m_workSize)
2518 << tcu::TestLog::Message
2519 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2520 << tcu::TestLog::EndMessage;
2522 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2526 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2531 std::vector<deUint32> intermediateResults;
2533 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2537 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2538 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2542 std::sort(intermediateResults.begin(), intermediateResults.end());
2544 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2546 if ((int)intermediateResults[valueNdx] != valueNdx)
2549 << tcu::TestLog::Message
2550 << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2551 << "Intermediate buffer contains invalid values. Intermediate results:\n"
2552 << tcu::TestLog::EndMessage;
2554 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2555 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2557 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2562 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2565 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2569 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2571 std::ostringstream buf;
2573 buf << "${GLSL_VERSION_DECL}\n"
2574 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2575 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2577 << " highp uint values[" << m_workSize * m_numCalls << "];\n"
2580 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2581 << "uniform highp uint u_callNdx;\n"
2585 << " highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2586 << " if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2587 << " sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2590 return specializeShader(m_context, buf.str().c_str());
2593 class ConcurrentImageAtomicCase : public TestCase
2597 ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2598 ~ConcurrentImageAtomicCase (void);
2602 IterateResult iterate (void);
2605 void readWorkImage (std::vector<deUint32>& result);
2607 std::string genComputeSource (void) const;
2608 std::string genImageReadSource (void) const;
2609 std::string genImageClearSource (void) const;
2611 const int m_numCalls;
2612 const int m_workSize;
2613 glu::ShaderProgram* m_program;
2614 glu::ShaderProgram* m_imageReadProgram;
2615 glu::ShaderProgram* m_imageClearProgram;
2617 std::vector<deUint32> m_intermediateResultBuffers;
2620 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2621 : TestCase (context, name, description)
2622 , m_numCalls (numCalls)
2623 , m_workSize (workSize)
2624 , m_program (DE_NULL)
2625 , m_imageReadProgram (DE_NULL)
2626 , m_imageClearProgram (DE_NULL)
2627 , m_imageID (DE_NULL)
2631 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2636 void ConcurrentImageAtomicCase::init (void)
2638 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2639 std::vector<deUint32> zeroData (m_workSize * m_workSize, 0);
2640 const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2));
2642 if (!supportsES32 && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2643 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2647 gl.genTextures(1, &m_imageID);
2648 gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2649 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2650 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2651 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2652 GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2656 for (int ndx = 0; ndx < m_numCalls; ++ndx)
2658 deUint32 buffer = 0;
2660 gl.genBuffers(1, &buffer);
2661 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2662 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2664 m_intermediateResultBuffers.push_back(buffer);
2665 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2670 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2671 m_testCtx.getLog() << *m_program;
2672 if (!m_program->isOk())
2673 throw tcu::TestError("could not build program");
2675 m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2676 if (!m_imageReadProgram->isOk())
2678 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2680 m_testCtx.getLog() << *m_imageReadProgram;
2681 throw tcu::TestError("could not build program");
2684 m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2685 if (!m_imageClearProgram->isOk())
2687 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2689 m_testCtx.getLog() << *m_imageClearProgram;
2690 throw tcu::TestError("could not build program");
2694 void ConcurrentImageAtomicCase::deinit (void)
2698 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2702 for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2703 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2704 m_intermediateResultBuffers.clear();
2707 m_program = DE_NULL;
2709 delete m_imageReadProgram;
2710 m_imageReadProgram = DE_NULL;
2712 delete m_imageClearProgram;
2713 m_imageClearProgram = DE_NULL;
2716 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2718 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2719 const deUint32 sumValue = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2720 std::vector<int> deltas;
2722 // generate unique deltas
2723 generateShuffledRamp(m_numCalls, deltas);
2727 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2729 gl.useProgram(m_imageClearProgram->getProgram());
2730 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2731 gl.dispatchCompute(m_workSize, m_workSize, 1);
2732 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2734 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2737 // invoke program N times, each with a different delta
2739 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2742 << tcu::TestLog::Message
2743 << "Running shader " << m_numCalls << " times.\n"
2744 << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2745 << "Setting u_atomicDelta to a unique value for each call.\n"
2746 << tcu::TestLog::EndMessage;
2748 if (deltaLocation == -1)
2749 throw tcu::TestError("u_atomicDelta location was -1");
2751 gl.useProgram(m_program->getProgram());
2752 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2754 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2757 << tcu::TestLog::Message
2758 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2759 << tcu::TestLog::EndMessage;
2761 gl.uniform1ui(deltaLocation, deltas[callNdx]);
2762 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2763 gl.dispatchCompute(m_workSize, m_workSize, 1);
2766 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2771 std::vector<deUint32> result;
2773 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2775 readWorkImage(result);
2777 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2779 if (result[ndx] != sumValue)
2782 << tcu::TestLog::Message
2783 << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2784 << "Work image contains invalid values."
2785 << tcu::TestLog::EndMessage;
2787 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2792 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2797 std::vector<std::vector<deUint32> > intermediateResults (m_numCalls);
2798 std::vector<deUint32> valueChain (m_numCalls);
2799 std::vector<deUint32> chainDelta (m_numCalls);
2801 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2805 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2807 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2808 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2813 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2815 int invalidOperationNdx;
2816 deUint32 errorDelta;
2817 deUint32 errorExpected;
2819 // collect result chain for each element
2820 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2821 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2823 // check there exists a path from 0 to sumValue using each addition once
2824 // decompose cumulative results to addition operations (all additions positive => this works)
2826 std::sort(valueChain.begin(), valueChain.end());
2828 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2829 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2831 // chainDelta contains now the actual additions applied to the value
2832 std::sort(chainDelta.begin(), chainDelta.end());
2835 if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2838 << tcu::TestLog::Message
2839 << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2840 << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2841 << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2842 << tcu::TestLog::EndMessage;
2844 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2845 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2846 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2848 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2853 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2856 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2860 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2862 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2863 glu::Buffer resultBuffer (m_context.getRenderContext());
2865 // Read image to an ssbo
2868 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2870 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2871 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2873 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2874 gl.useProgram(m_imageReadProgram->getProgram());
2876 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2877 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2878 gl.dispatchCompute(m_workSize, m_workSize, 1);
2880 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2885 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2886 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2889 throw tcu::TestError("mapBufferRange returned NULL");
2891 result.resize(m_workSize * m_workSize);
2892 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2894 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2895 throw tcu::TestError("unmapBuffer returned false");
2899 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2901 std::ostringstream buf;
2903 buf << "${GLSL_VERSION_DECL}\n"
2904 << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2906 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2907 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2909 << " highp uint values[" << m_workSize * m_workSize << "];\n"
2912 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2913 << "uniform highp uint u_atomicDelta;\n"
2917 << " highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2918 << " sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2921 return specializeShader(m_context, buf.str().c_str());
2924 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2926 std::ostringstream buf;
2928 buf << "${GLSL_VERSION_DECL}\n"
2930 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2931 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2933 << " highp uint values[" << m_workSize * m_workSize << "];\n"
2936 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2940 << " highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2941 << " sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2944 return specializeShader(m_context, buf.str().c_str());
2947 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2949 std::ostringstream buf;
2951 buf << "${GLSL_VERSION_DECL}\n"
2953 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2954 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2958 << " imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2961 return specializeShader(m_context, buf.str().c_str());
2964 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2967 ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2968 ~ConcurrentSSBOAtomicCounterMixedCase (void);
2972 IterateResult iterate (void);
2975 std::string genSSBOComputeSource (void) const;
2976 std::string genAtomicCounterComputeSource (void) const;
2978 const int m_numCalls;
2979 const int m_workSize;
2980 deUint32 m_bufferID;
2981 glu::ShaderProgram* m_ssboAtomicProgram;
2982 glu::ShaderProgram* m_atomicCounterProgram;
2985 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2986 : TestCase (context, name, description)
2987 , m_numCalls (numCalls)
2988 , m_workSize (workSize)
2989 , m_bufferID (DE_NULL)
2990 , m_ssboAtomicProgram (DE_NULL)
2991 , m_atomicCounterProgram (DE_NULL)
2993 // SSBO atomic XORs cancel out
2994 DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2997 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
3002 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3004 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3005 const deUint32 zeroBuf[2] = { 0, 0 };
3009 gl.genBuffers(1, &m_bufferID);
3010 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3011 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3013 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3018 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3020 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3021 m_testCtx.getLog() << *m_ssboAtomicProgram;
3022 if (!m_ssboAtomicProgram->isOk())
3023 throw tcu::TestError("could not build program");
3026 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3028 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3029 m_testCtx.getLog() << *m_atomicCounterProgram;
3030 if (!m_atomicCounterProgram->isOk())
3031 throw tcu::TestError("could not build program");
3035 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3039 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3043 delete m_ssboAtomicProgram;
3044 m_ssboAtomicProgram = DE_NULL;
3046 delete m_atomicCounterProgram;
3047 m_atomicCounterProgram = DE_NULL;
3050 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3052 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3054 m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3056 // invoke programs N times
3059 << tcu::TestLog::Message
3060 << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3061 << "Num groups = (" << m_workSize << ", 1, 1)\n"
3062 << tcu::TestLog::EndMessage;
3064 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3065 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3067 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3069 gl.useProgram(m_atomicCounterProgram->getProgram());
3070 gl.dispatchCompute(m_workSize, 1, 1);
3072 gl.useProgram(m_ssboAtomicProgram->getProgram());
3073 gl.dispatchCompute(m_workSize, 1, 1);
3076 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3083 // XORs cancel out, only addition is left
3084 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3086 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3087 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3089 if ((int)result != m_numCalls*m_workSize)
3092 << tcu::TestLog::Message
3093 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3094 << tcu::TestLog::EndMessage;
3096 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3100 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3103 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3107 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3109 std::ostringstream buf;
3111 buf << "${GLSL_VERSION_DECL}\n"
3112 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3113 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3115 << " highp uint targetValue;\n"
3116 << " highp uint dummy;\n"
3121 << " // flip high bits\n"
3122 << " highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3123 << " sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3126 return specializeShader(m_context, buf.str().c_str());
3129 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3131 std::ostringstream buf;
3133 buf << "${GLSL_VERSION_DECL}\n"
3134 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3136 << "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3140 << " atomicCounterIncrement(u_counter);\n"
3143 return specializeShader(m_context, buf.str().c_str());
3148 SynchronizationTests::SynchronizationTests (Context& context)
3149 : TestCaseGroup(context, "synchronization", "Synchronization tests")
3153 SynchronizationTests::~SynchronizationTests (void)
3157 void SynchronizationTests::init (void)
3159 tcu::TestCaseGroup* const inInvocationGroup = new tcu::TestCaseGroup(m_testCtx, "in_invocation", "Test intra-invocation synchronization");
3160 tcu::TestCaseGroup* const interInvocationGroup = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3161 tcu::TestCaseGroup* const interCallGroup = new tcu::TestCaseGroup(m_testCtx, "inter_call", "Test inter-call synchronization");
3163 addChild(inInvocationGroup);
3164 addChild(interInvocationGroup);
3165 addChild(interCallGroup);
3167 // .in_invocation & .inter_invocation
3169 static const struct CaseConfig
3171 const char* namePrefix;
3172 const InterInvocationTestCase::StorageType storage;
3176 { "image", InterInvocationTestCase::STORAGE_IMAGE, 0 },
3177 { "image_atomic", InterInvocationTestCase::STORAGE_IMAGE, InterInvocationTestCase::FLAG_ATOMIC },
3178 { "ssbo", InterInvocationTestCase::STORAGE_BUFFER, 0 },
3179 { "ssbo_atomic", InterInvocationTestCase::STORAGE_BUFFER, InterInvocationTestCase::FLAG_ATOMIC },
3182 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3184 tcu::TestCaseGroup* const targetGroup = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3185 const int extraFlags = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3187 for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3189 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3191 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3192 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3193 (std::string("Write to ") + target + " and read it").c_str(),
3194 configs[configNdx].storage,
3195 configs[configNdx].flags | extraFlags));
3197 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3198 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3199 (std::string("Read form ") + target + " and then write to it").c_str(),
3200 configs[configNdx].storage,
3201 configs[configNdx].flags | extraFlags));
3203 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3204 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3205 (std::string("Write to ") + target + " twice and read it").c_str(),
3206 configs[configNdx].storage,
3207 configs[configNdx].flags | extraFlags));
3209 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3210 (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3211 (std::string("Write to aliasing ") + target + " and read it").c_str(),
3212 InvocationAliasWriteCase::TYPE_WRITE,
3213 configs[configNdx].storage,
3214 configs[configNdx].flags | extraFlags));
3216 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3217 (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3218 (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3219 InvocationAliasWriteCase::TYPE_OVERWRITE,
3220 configs[configNdx].storage,
3221 configs[configNdx].flags | extraFlags));
3228 tcu::TestCaseGroup* const withBarrierGroup = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3229 tcu::TestCaseGroup* const withoutBarrierGroup = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3231 interCallGroup->addChild(withBarrierGroup);
3232 interCallGroup->addChild(withoutBarrierGroup);
3234 // .with_memory_barrier
3236 static const struct CaseConfig
3238 const char* namePrefix;
3239 const InterCallTestCase::StorageType storage;
3243 { "image", InterCallTestCase::STORAGE_IMAGE, 0 },
3244 { "image_atomic", InterCallTestCase::STORAGE_IMAGE, InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT },
3245 { "ssbo", InterCallTestCase::STORAGE_BUFFER, 0 },
3246 { "ssbo_atomic", InterCallTestCase::STORAGE_BUFFER, InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT },
3249 const int seed0 = 123;
3250 const int seed1 = 457;
3252 for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3254 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3256 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3257 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3258 (std::string("Write to ") + target + " and read it").c_str(),
3259 configs[configNdx].storage,
3260 configs[configNdx].flags,
3261 InterCallOperations()
3262 << op::WriteData::Generate(1, seed0)
3264 << op::ReadData::Generate(1, seed0)));
3266 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3267 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3268 (std::string("Read from ") + target + " and then write to it").c_str(),
3269 configs[configNdx].storage,
3270 configs[configNdx].flags,
3271 InterCallOperations()
3272 << op::ReadZeroData::Generate(1)
3274 << op::WriteData::Generate(1, seed0)));
3276 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3277 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3278 (std::string("Write to ") + target + " twice and read it").c_str(),
3279 configs[configNdx].storage,
3280 configs[configNdx].flags,
3281 InterCallOperations()
3282 << op::WriteData::Generate(1, seed0)
3284 << op::WriteData::Generate(1, seed1)
3286 << op::ReadData::Generate(1, seed1)));
3288 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3289 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3290 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3291 configs[configNdx].storage,
3292 configs[configNdx].flags,
3293 InterCallOperations()
3294 << op::WriteData::Generate(1, seed0)
3295 << op::WriteData::Generate(2, seed1)
3297 << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3299 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3300 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3301 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3302 configs[configNdx].storage,
3303 configs[configNdx].flags,
3304 InterCallOperations()
3305 << op::WriteDataInterleaved::Generate(1, seed0, true)
3306 << op::WriteDataInterleaved::Generate(1, seed1, false)
3308 << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3310 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3311 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3312 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3313 configs[configNdx].storage,
3314 configs[configNdx].flags,
3315 InterCallOperations()
3316 << op::WriteData::Generate(1, seed0)
3317 << op::WriteData::Generate(2, seed1)
3319 << op::ReadData::Generate(1, seed0)
3320 << op::ReadData::Generate(2, seed1)));
3322 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3323 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3324 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3325 configs[configNdx].storage,
3326 configs[configNdx].flags,
3327 InterCallOperations()
3328 << op::WriteData::Generate(1, seed0)
3329 << op::WriteData::Generate(2, seed1)
3331 << op::ReadData::Generate(2, seed1)
3332 << op::ReadData::Generate(1, seed0)));
3335 // .without_memory_barrier
3337 struct InvocationConfig
3343 static const InvocationConfig ssboInvocations[] =
3349 static const InvocationConfig imageInvocations[] =
3355 static const InvocationConfig counterInvocations[] =
3361 static const int callCounts[] = { 2, 5, 100 };
3363 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3364 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3365 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3367 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3368 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3369 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3371 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3372 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3373 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3375 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3376 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3377 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));