Fix PIPELINE_STAGE_TOP_OF_PIPE_BIT usage in api tests
[platform/upstream/VK-GL-CTS.git] / modules / gles31 / functional / es31fIndirectComputeDispatchTests.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Indirect compute dispatch tests.
22  *//*--------------------------------------------------------------------*/
23
24 #include "es31fIndirectComputeDispatchTests.hpp"
25 #include "gluObjectWrapper.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "glwFunctions.hpp"
29 #include "glwEnums.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deStringUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <map>
38
39 namespace deqp
40 {
41 namespace gles31
42 {
43 namespace Functional
44 {
45
46 using tcu::UVec3;
47 using tcu::TestLog;
48 using std::vector;
49 using std::string;
50 using std::map;
51
52 // \todo [2014-02-17 pyry] Should be extended with following:
53
54 // Negative:
55 //  - no active shader program
56 //  - indirect negative or not aligned
57 //  - indirect + size outside buffer bounds
58 //  - no buffer bound to DRAW_INDIRECT_BUFFER
59 //  - (implict) buffer mapped
60
61 // Robustness:
62 //  - lot of small work group launches
63 //  - very large work group size
64 //  - no synchronization, touched by gpu
65 //  - compute program overwiting buffer
66
67 namespace
68 {
69
70 enum
71 {
72         RESULT_BLOCK_BASE_SIZE                          = (3+1)*(int)sizeof(deUint32),          // uvec3 + uint
73         RESULT_BLOCK_EXPECTED_COUNT_OFFSET      = 0,
74         RESULT_BLOCK_NUM_PASSED_OFFSET          = 3*(int)sizeof(deUint32),
75
76         INDIRECT_COMMAND_SIZE                           = 3*(int)sizeof(deUint32)
77 };
78
79 enum GenBuffer
80 {
81         GEN_BUFFER_UPLOAD               = 0,
82         GEN_BUFFER_COMPUTE,
83
84         GEN_BUFFER_LAST
85 };
86
87 glu::ProgramSources genVerifySources (const UVec3& workGroupSize)
88 {
89         static const char* s_verifyDispatchTmpl =
90                 "#version 310 es\n"
91                 "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
92                 "layout(binding = 0, std430) buffer Result\n"
93                 "{\n"
94                 "    uvec3           expectedGroupCount;\n"
95                 "    coherent uint   numPassed;\n"
96                 "} result;\n"
97                 "void main (void)\n"
98                 "{\n"
99                 "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
100                 "        atomicAdd(result.numPassed, 1u);\n"
101                 "}\n";
102
103         map<string, string> args;
104
105         args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x());
106         args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y());
107         args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z());
108
109         return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args));
110 }
111
112 class IndirectDispatchCase : public TestCase
113 {
114 public:
115                                                         IndirectDispatchCase    (Context& context, const char* name, const char* description, GenBuffer genBuffer);
116                                                         ~IndirectDispatchCase   (void);
117
118         IterateResult                   iterate                                 (void);
119
120 protected:
121         struct DispatchCommand
122         {
123                 deIntptr        offset;
124                 UVec3           numWorkGroups;
125
126                 DispatchCommand (void) : offset(0) {}
127                 DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {}
128         };
129
130         GenBuffer                               m_genBuffer;
131         deUintptr                               m_bufferSize;
132         UVec3                                   m_workGroupSize;
133         vector<DispatchCommand> m_commands;
134
135         void                                    createCommandBuffer             (deUint32 buffer) const;
136         void                                    createResultBuffer              (deUint32 buffer) const;
137
138         bool                                    verifyResultBuffer              (deUint32 buffer);
139
140         void                                    createCmdBufferUpload   (deUint32 buffer) const;
141         void                                    createCmdBufferCompute  (deUint32 buffer) const;
142
143 private:
144                                                         IndirectDispatchCase    (const IndirectDispatchCase&);
145         IndirectDispatchCase&   operator=                               (const IndirectDispatchCase&);
146 };
147
148 IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer)
149         : TestCase              (context, name, description)
150         , m_genBuffer   (genBuffer)
151         , m_bufferSize  (0)
152 {
153 }
154
155 IndirectDispatchCase::~IndirectDispatchCase (void)
156 {
157 }
158
159 static int getResultBlockAlignedSize (const glw::Functions& gl)
160 {
161         const int       baseSize        = RESULT_BLOCK_BASE_SIZE;
162         int                     alignment       = 0;
163         gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment);
164
165         if (alignment == 0 || (baseSize % alignment == 0))
166                 return baseSize;
167         else
168                 return (baseSize/alignment + 1)*alignment;
169 }
170
171 void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const
172 {
173         switch (m_genBuffer)
174         {
175                 case GEN_BUFFER_UPLOAD:         createCmdBufferUpload   (buffer);               break;
176                 case GEN_BUFFER_COMPUTE:        createCmdBufferCompute  (buffer);               break;
177                 default:
178                         DE_ASSERT(false);
179         }
180 }
181
182 void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const
183 {
184         const glw::Functions&   gl              = m_context.getRenderContext().getFunctions();
185         vector<deUint8>                 data    (m_bufferSize);
186
187         for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
188         {
189                 DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3);
190                 DE_ASSERT(cmdIter->offset >= 0);
191                 DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0);
192                 DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize);
193
194                 deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset];
195
196                 dstPtr[0] = cmdIter->numWorkGroups[0];
197                 dstPtr[1] = cmdIter->numWorkGroups[1];
198                 dstPtr[2] = cmdIter->numWorkGroups[2];
199         }
200
201         gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
202         gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW);
203 }
204
205 void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const
206 {
207         std::ostringstream src;
208
209         // Header
210         src <<
211                 "#version 310 es\n"
212                 "layout(local_size_x = 1) in;\n"
213                 "layout(std430, binding = 1) buffer Out\n"
214                 "{\n"
215                 "       highp uint data[];\n"
216                 "};\n"
217                 "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
218                 "{\n"
219                 "       data[offset+0u] = numWorkGroups.x;\n"
220                 "       data[offset+1u] = numWorkGroups.y;\n"
221                 "       data[offset+2u] = numWorkGroups.z;\n"
222                 "}\n"
223                 "void main (void)\n"
224                 "{\n";
225
226         // Commands
227         for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
228         {
229                 const deUint32 offs = (deUint32)(cmdIter->offset/4);
230                 DE_ASSERT((deIntptr)offs*4 == cmdIter->offset);
231
232                 src << "\twriteCmd(" << offs << "u, uvec3("
233                         << cmdIter->numWorkGroups.x() << "u, "
234                         << cmdIter->numWorkGroups.y() << "u, "
235                         << cmdIter->numWorkGroups.z() << "u));\n";
236         }
237
238         src << "}\n";
239
240         {
241                 const glw::Functions&   gl                      = m_context.getRenderContext().getFunctions();
242                 glu::ShaderProgram              program         (m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str()));
243
244                 m_testCtx.getLog() << program;
245                 if (!program.isOk())
246                         TCU_FAIL("Compile failed");
247
248                 gl.useProgram(program.getProgram());
249
250                 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
251                 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW);
252                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer);
253                 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
254
255                 gl.dispatchCompute(1,1,1);
256                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed");
257
258                 gl.memoryBarrier(GL_COMMAND_BARRIER_BIT);
259                 GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed");
260         }
261 }
262
263 void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const
264 {
265         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
266         const int                               resultBlockSize         = getResultBlockAlignedSize(gl);
267         const int                               resultBufferSize        = resultBlockSize*(int)m_commands.size();
268         vector<deUint8>                 data                            (resultBufferSize);
269
270         for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
271         {
272                 deUint8* const  dstPtr  = &data[resultBlockSize*cmdNdx];
273
274                 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4) = m_commands[cmdNdx].numWorkGroups[0];
275                 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4) = m_commands[cmdNdx].numWorkGroups[1];
276                 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4) = m_commands[cmdNdx].numWorkGroups[2];
277                 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET)                   = 0;
278         }
279
280         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
281         gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ);
282 }
283
284 deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups)
285 {
286         const int       numInvocationsPerGroup  = workGroupSize[0]*workGroupSize[1]*workGroupSize[2];
287         const int       numGroups                               = numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2];
288
289         return numInvocationsPerGroup*numGroups;
290 }
291
292 bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer)
293 {
294         const glw::Functions&   gl                                      = m_context.getRenderContext().getFunctions();
295
296         const int                               resultBlockSize         = getResultBlockAlignedSize(gl);
297         const int                               resultBufferSize        = resultBlockSize*(int)m_commands.size();
298
299         void*                                   mapPtr                          = DE_NULL;
300         bool                                    allOk                           = true;
301
302         try
303         {
304                 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
305                 mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT);
306
307                 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed");
308                 TCU_CHECK(mapPtr);
309
310                 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
311                 {
312                         const DispatchCommand&  cmd                             = m_commands[cmdNdx];
313                         const deUint8* const    srcPtr                  = (const deUint8*)mapPtr + cmdNdx*resultBlockSize;
314                         const deUint32                  numPassed               = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
315                         const deUint32                  expectedCount   = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups);
316
317                         // Verify numPassed.
318                         if (numPassed != expectedCount)
319                         {
320                                 m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
321                                                                                                            << ": got numPassed = " << numPassed << ", expected " << expectedCount
322                                                                    << TestLog::EndMessage;
323                                 allOk = false;
324                         }
325                 }
326         }
327         catch (...)
328         {
329                 if (mapPtr)
330                         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
331         }
332
333         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
334         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed");
335
336         return allOk;
337 }
338
339 IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void)
340 {
341         const glu::RenderContext&               renderCtx                       = m_context.getRenderContext();
342         const glw::Functions&                   gl                                      = renderCtx.getFunctions();
343
344         const glu::ShaderProgram                program                         (renderCtx, genVerifySources(m_workGroupSize));
345
346         glu::Buffer                                             cmdBuffer                       (renderCtx);
347         glu::Buffer                                             resultBuffer            (renderCtx);
348
349         m_testCtx.getLog() << program;
350         TCU_CHECK_MSG(program.isOk(), "Compile failed");
351
352         m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage;
353         {
354                 tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)");
355
356                 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
357                         m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset
358                                                                                                    << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups
359                                                            << TestLog::EndMessage;
360         }
361
362         createResultBuffer(*resultBuffer);
363         createCommandBuffer(*cmdBuffer);
364
365         gl.useProgram(program.getProgram());
366         gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer);
367         GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed");
368
369         {
370                 const int       resultBlockAlignedSize          = getResultBlockAlignedSize(gl);
371                 deIntptr        curOffset                                       = 0;
372
373                 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
374                 {
375                         gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize);
376                         gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset);
377
378                         curOffset += resultBlockAlignedSize;
379                 }
380         }
381
382         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed");
383
384         if (verifyResultBuffer(*resultBuffer))
385                 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
386         else
387                 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
388
389         return STOP;
390 }
391
392 class SingleDispatchCase : public IndirectDispatchCase
393 {
394 public:
395         SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups)
396                 : IndirectDispatchCase(context, name, description, genBuffer)
397         {
398                 m_bufferSize    = bufferSize;
399                 m_workGroupSize = workGroupSize;
400                 m_commands.push_back(DispatchCommand(offset, numWorkGroups));
401         }
402 };
403
404 class MultiDispatchCase : public IndirectDispatchCase
405 {
406 public:
407         MultiDispatchCase (Context& context, GenBuffer genBuffer)
408                 : IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer)
409         {
410                 m_bufferSize    = 1<<10;
411                 m_workGroupSize = UVec3(3,1,2);
412
413                 m_commands.push_back(DispatchCommand(0,                                         UVec3(1,1,1)));
414                 m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE,     UVec3(2,1,1)));
415                 m_commands.push_back(DispatchCommand(104,                                       UVec3(1,3,1)));
416                 m_commands.push_back(DispatchCommand(40,                                        UVec3(1,1,7)));
417                 m_commands.push_back(DispatchCommand(52,                                        UVec3(1,1,4)));
418         }
419 };
420
421 class MultiDispatchReuseCommandCase : public IndirectDispatchCase
422 {
423 public:
424         MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer)
425                 : IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer)
426         {
427                 m_bufferSize    = 1<<10;
428                 m_workGroupSize = UVec3(3,1,2);
429
430                 m_commands.push_back(DispatchCommand(0,                                         UVec3(1,1,1)));
431                 m_commands.push_back(DispatchCommand(0,                                         UVec3(1,1,1)));
432                 m_commands.push_back(DispatchCommand(0,                                         UVec3(1,1,1)));
433                 m_commands.push_back(DispatchCommand(104,                                       UVec3(1,3,1)));
434                 m_commands.push_back(DispatchCommand(104,                                       UVec3(1,3,1)));
435                 m_commands.push_back(DispatchCommand(52,                                        UVec3(1,1,4)));
436                 m_commands.push_back(DispatchCommand(52,                                        UVec3(1,1,4)));
437         }
438 };
439
440 } // anonymous
441
442 IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context)
443         : TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests")
444 {
445 }
446
447 IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void)
448 {
449 }
450
451 void IndirectComputeDispatchTests::init (void)
452 {
453         static const struct
454         {
455                 const char*             name;
456                 GenBuffer               gen;
457         } s_genBuffer[] =
458         {
459                 { "upload_buffer",              GEN_BUFFER_UPLOAD       },
460                 { "gen_in_compute",             GEN_BUFFER_COMPUTE      }
461         };
462
463         static const struct
464         {
465                 const char*     name;
466                 const char*     description;
467                 deUintptr       bufferSize;
468                 deUintptr       offset;
469                 UVec3           workGroupSize;
470                 UVec3           numWorkGroups;
471         } s_singleDispatchCases[] =
472         {
473         //      Name                                                                            Desc                                                                                    BufferSize                                      Offs                    WorkGroupSize   NumWorkGroups
474                 { "single_invocation",                                          "Single invocation only from offset 0",                 INDIRECT_COMMAND_SIZE,          0,                              UVec3(1,1,1),   UVec3(1,1,1) },
475                 { "multiple_groups",                                            "Multiple groups dispatched from offset 0",             INDIRECT_COMMAND_SIZE,          0,                              UVec3(1,1,1),   UVec3(2,3,5) },
476                 { "multiple_groups_multiple_invocations",       "Multiple groups of size 2x3x1 from offset 0",  INDIRECT_COMMAND_SIZE,          0,                              UVec3(2,3,1),   UVec3(1,2,3) },
477                 { "small_offset",                                                       "Small offset",                                                                 16+INDIRECT_COMMAND_SIZE,       16,                             UVec3(1,1,1),   UVec3(1,1,1) },
478                 { "large_offset",                                                       "Large offset",                                                                 (2<<20),                                        (1<<20) + 12,   UVec3(1,1,1),   UVec3(1,1,1) },
479                 { "large_offset_multiple_invocations",          "Large offset, multiple invocations",                   (2<<20),                                        (1<<20) + 12,   UVec3(2,3,1),   UVec3(1,2,3) },
480                 { "empty_command",                                                      "Empty command",                                                                INDIRECT_COMMAND_SIZE,          0,                              UVec3(1,1,1),   UVec3(0,0,0) },
481         };
482
483         for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++)
484         {
485                 const GenBuffer                         genBuf          = s_genBuffer[genNdx].gen;
486                 tcu::TestCaseGroup* const       genGroup        = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, "");
487                 addChild(genGroup);
488
489                 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++)
490                         genGroup->addChild(new SingleDispatchCase(m_context,
491                                                                                                           s_singleDispatchCases[ndx].name,
492                                                                                                           s_singleDispatchCases[ndx].description,
493                                                                                                           genBuf,
494                                                                                                           s_singleDispatchCases[ndx].bufferSize,
495                                                                                                           s_singleDispatchCases[ndx].offset,
496                                                                                                           s_singleDispatchCases[ndx].workGroupSize,
497                                                                                                           s_singleDispatchCases[ndx].numWorkGroups));
498
499                 genGroup->addChild(new MultiDispatchCase                                (m_context, genBuf));
500                 genGroup->addChild(new MultiDispatchReuseCommandCase    (m_context, genBuf));
501         }
502 }
503
504 } // Functional
505 } // gles31
506 } // deqp