am 9eefd6e7: am 5efd3ca9: (-s ours) am fa285d6b: am 36059e0d: am 7ce9438a: am 0870de9...
[platform/upstream/VK-GL-CTS.git] / modules / glshared / glsShaderExecUtil.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL (ES) Module
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Shader execution utilities.
22  *//*--------------------------------------------------------------------*/
23
24 #include "glsShaderExecUtil.hpp"
25 #include "gluRenderContext.hpp"
26 #include "gluDrawUtil.hpp"
27 #include "gluObjectWrapper.hpp"
28 #include "gluShaderProgram.hpp"
29 #include "gluTextureUtil.hpp"
30 #include "gluProgramInterfaceQuery.hpp"
31 #include "gluPixelTransfer.hpp"
32 #include "gluStrUtil.hpp"
33 #include "tcuTestLog.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deSTLUtil.hpp"
37 #include "deStringUtil.hpp"
38 #include "deUniquePtr.hpp"
39 #include "deMemory.h"
40
41 #include <map>
42
43 namespace deqp
44 {
45 namespace gls
46 {
47
48 namespace ShaderExecUtil
49 {
50
51 using std::vector;
52
53 static bool isExtensionSupported (const glu::RenderContext& renderCtx, const std::string& extension)
54 {
55         const glw::Functions&   gl              = renderCtx.getFunctions();
56         int                                             numExts = 0;
57
58         gl.getIntegerv(GL_NUM_EXTENSIONS, &numExts);
59
60         for (int ndx = 0; ndx < numExts; ndx++)
61         {
62                 const char* curExt = (const char*)gl.getStringi(GL_EXTENSIONS, ndx);
63
64                 if (extension == curExt)
65                         return true;
66         }
67
68         return false;
69 }
70
71 static void checkExtension (const glu::RenderContext& renderCtx, const std::string& extension)
72 {
73         if (!isExtensionSupported(renderCtx, extension))
74                 throw tcu::NotSupportedError(extension + " is not supported");
75 }
76
77 static void checkLimit (const glu::RenderContext& renderCtx, deUint32 pname, int required)
78 {
79         const glw::Functions&   gl                                      = renderCtx.getFunctions();
80         int                                             implementationLimit     = -1;
81         deUint32                                error;
82
83         gl.getIntegerv(pname, &implementationLimit);
84         error = gl.getError();
85
86         if (error != GL_NO_ERROR)
87                 throw tcu::TestError("Failed to query " + de::toString(glu::getGettableStateStr(pname)) + " - got " + de::toString(glu::getErrorStr(error)));
88         if (implementationLimit < required)
89                 throw tcu::NotSupportedError("Test requires " + de::toString(glu::getGettableStateStr(pname)) + " >= " + de::toString(required) + ", got " + de::toString(implementationLimit));
90 }
91
92 // Shader utilities
93
94 static std::string generateVertexShader (const ShaderSpec& shaderSpec)
95 {
96         const bool                      usesInout       = glu::glslVersionUsesInOutQualifiers(shaderSpec.version);
97         const char*                     in                      = usesInout ? "in"              : "attribute";
98         const char*                     out                     = usesInout ? "out"             : "varying";
99         std::ostringstream      src;
100
101         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
102
103         if (!shaderSpec.globalDeclarations.empty())
104                 src << shaderSpec.globalDeclarations << "\n";
105
106         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
107                 src << in << " " << glu::declare(input->varType, input->name) << ";\n";
108
109         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
110         {
111                 DE_ASSERT(output->varType.isBasicType());
112
113                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
114                 {
115                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
116                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
117                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
118
119                         src << "flat " << out << " " << glu::declare(intType, "o_" + output->name) << ";\n";
120                 }
121                 else
122                         src << "flat " << out << " " << glu::declare(output->varType, output->name) << ";\n";
123         }
124
125         src << "\n"
126                 << "void main (void)\n"
127                 << "{\n"
128                 << "    gl_Position = vec4(0.0);\n"
129                 << "    gl_PointSize = 1.0;\n\n";
130
131         // Declare necessary output variables (bools).
132         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
133         {
134                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
135                         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
136         }
137
138         // Operation - indented to correct level.
139         {
140                 std::istringstream      opSrc   (shaderSpec.source);
141                 std::string                     line;
142
143                 while (std::getline(opSrc, line))
144                         src << "\t" << line << "\n";
145         }
146
147         // Assignments to outputs.
148         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
149         {
150                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
151                 {
152                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
153                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
154
155                         src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
156                 }
157         }
158
159         src << "}\n";
160
161         return src.str();
162 }
163
164 static std::string generateGeometryShader (const ShaderSpec& shaderSpec)
165 {
166         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
167
168         std::ostringstream      src;
169
170         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
171
172         if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
173                 src << "#extension GL_EXT_geometry_shader : require\n";
174
175         if (!shaderSpec.globalDeclarations.empty())
176                 src << shaderSpec.globalDeclarations << "\n";
177
178         src << "layout(points) in;\n"
179                 << "layout(points, max_vertices = 1) out;\n";
180
181         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
182                 src << "flat in " << glu::declare(input->varType, "geom_" + input->name) << "[];\n";
183
184         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
185         {
186                 DE_ASSERT(output->varType.isBasicType());
187
188                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
189                 {
190                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
191                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
192                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
193
194                         src << "flat out " << glu::declare(intType, "o_" + output->name) << ";\n";
195                 }
196                 else
197                         src << "flat out " << glu::declare(output->varType, output->name) << ";\n";
198         }
199
200         src << "\n"
201                 << "void main (void)\n"
202                 << "{\n"
203                 << "    gl_Position = gl_in[0].gl_Position;\n\n";
204
205         // Fetch input variables
206         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
207                 src << "\t" << glu::declare(input->varType, input->name) << " = geom_" << input->name << "[0];\n";
208
209         // Declare necessary output variables (bools).
210         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
211         {
212                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
213                         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
214         }
215
216         src << "\n";
217
218         // Operation - indented to correct level.
219         {
220                 std::istringstream      opSrc   (shaderSpec.source);
221                 std::string                     line;
222
223                 while (std::getline(opSrc, line))
224                         src << "\t" << line << "\n";
225         }
226
227         // Assignments to outputs.
228         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
229         {
230                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
231                 {
232                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
233                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
234
235                         src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
236                 }
237         }
238
239         src << "        EmitVertex();\n"
240                 << "    EndPrimitive();\n"
241                 << "}\n";
242
243         return src.str();
244 }
245
246 static std::string generateEmptyFragmentSource (glu::GLSLVersion version)
247 {
248         const bool                      customOut               = glu::glslVersionUsesInOutQualifiers(version);
249         std::ostringstream      src;
250
251         src << glu::getGLSLVersionDeclaration(version) << "\n";
252
253         // \todo [2013-08-05 pyry] Do we need one dummy output?
254
255         src << "void main (void)\n{\n";
256         if (!customOut)
257                 src << "        gl_FragColor = vec4(0.0);\n";
258         src << "}\n";
259
260         return src.str();
261 }
262
263 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
264 {
265         // flat qualifier is not present in earlier versions?
266         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
267
268         std::ostringstream src;
269
270         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"
271                 << "in highp vec4 a_position;\n";
272
273         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
274         {
275                 src << "in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
276                         << "flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
277         }
278
279         src << "\nvoid main (void)\n{\n"
280                 << "    gl_Position = a_position;\n"
281                 << "    gl_PointSize = 1.0;\n";
282
283         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
284                 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
285
286         src << "}\n";
287
288         return src.str();
289 }
290
291 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap)
292 {
293         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
294
295         std::ostringstream      src;
296         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
297
298         if (!shaderSpec.globalDeclarations.empty())
299                 src << shaderSpec.globalDeclarations << "\n";
300
301         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
302                 src << "flat in " << glu::declare(input->varType, input->name) << ";\n";
303
304         for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
305         {
306                 const Symbol&                           output          = shaderSpec.outputs[outNdx];
307                 const int                                       location        = de::lookup(outLocationMap, output.name);
308                 const std::string                       outVarName      = "o_" + output.name;
309                 glu::VariableDeclaration        decl            (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
310
311                 TCU_CHECK_INTERNAL(output.varType.isBasicType());
312
313                 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
314                 {
315                         const int                       vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
316                         const glu::DataType     uintBasicType   = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
317                         const glu::VarType      uintType                (uintBasicType, glu::PRECISION_HIGHP);
318
319                         decl.varType = uintType;
320                         src << decl << ";\n";
321                 }
322                 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
323                 {
324                         const int                       vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
325                         const glu::DataType     intBasicType    = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
326                         const glu::VarType      intType                 (intBasicType, glu::PRECISION_HIGHP);
327
328                         decl.varType = intType;
329                         src << decl << ";\n";
330                 }
331                 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
332                 {
333                         const int                       vecSize                 = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
334                         const int                       numVecs                 = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
335                         const glu::DataType     uintBasicType   = glu::getDataTypeUintVec(vecSize);
336                         const glu::VarType      uintType                (uintBasicType, glu::PRECISION_HIGHP);
337
338                         decl.varType = uintType;
339                         for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
340                         {
341                                 decl.name                               = outVarName + "_" + de::toString(vecNdx);
342                                 decl.layout.location    = location + vecNdx;
343                                 src << decl << ";\n";
344                         }
345                 }
346                 else
347                         src << glu::VariableDeclaration(output.varType, output.name, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, location) << ";\n";
348         }
349
350         src << "\nvoid main (void)\n{\n";
351
352         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
353         {
354                 if ((useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType())) ||
355                         glu::isDataTypeBoolOrBVec(output->varType.getBasicType()) ||
356                         glu::isDataTypeMatrix(output->varType.getBasicType()))
357                         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
358         }
359
360         // Operation - indented to correct level.
361         {
362                 std::istringstream      opSrc   (shaderSpec.source);
363                 std::string                     line;
364
365                 while (std::getline(opSrc, line))
366                         src << "\t" << line << "\n";
367         }
368
369         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
370         {
371                 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
372                         src << "        o_" << output->name << " = floatBitsToUint(" << output->name << ");\n";
373                 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374                 {
375                         const int                       numVecs                 = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376
377                         for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378                                 if (useIntOutputs)
379                                         src << "\to_" << output->name << "_" << vecNdx << " = floatBitsToUint(" << output->name << "[" << vecNdx << "]);\n";
380                                 else
381                                         src << "\to_" << output->name << "_" << vecNdx << " = " << output->name << "[" << vecNdx << "];\n";
382                 }
383                 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384                 {
385                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
386                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387
388                         src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
389                 }
390         }
391
392         src << "}\n";
393
394         return src.str();
395 }
396
397 // ShaderExecutor
398
399 ShaderExecutor::ShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
400         : m_renderCtx   (renderCtx)
401         , m_inputs              (shaderSpec.inputs)
402         , m_outputs             (shaderSpec.outputs)
403 {
404 }
405
406 ShaderExecutor::~ShaderExecutor (void)
407 {
408 }
409
410 void ShaderExecutor::useProgram (void)
411 {
412         DE_ASSERT(isOk());
413         m_renderCtx.getFunctions().useProgram(getProgram());
414 }
415
416 // VertexProcessorExecutor (base class for vertex and geometry executors)
417
418 class VertexProcessorExecutor : public ShaderExecutor
419 {
420 public:
421                                                                 VertexProcessorExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
422                                                                 ~VertexProcessorExecutor(void);
423
424         bool                                            isOk                                    (void) const                            { return m_program.isOk();                      }
425         void                                            log                                             (tcu::TestLog& dst) const       { dst << m_program;                                     }
426         deUint32                                        getProgram                              (void) const                            { return m_program.getProgram();        }
427
428         void                                            execute                                 (int numValues, const void* const* inputs, void* const* outputs);
429
430 protected:
431         glu::ShaderProgram                      m_program;
432 };
433
434 template<typename Iterator>
435 struct SymbolNameIterator
436 {
437         Iterator symbolIter;
438
439         SymbolNameIterator (Iterator symbolIter_) : symbolIter(symbolIter_) {}
440
441         inline SymbolNameIterator&      operator++      (void)                                                          { ++symbolIter; return *this;                           }
442
443         inline bool                                     operator==      (const SymbolNameIterator& other)       { return symbolIter == other.symbolIter;        }
444         inline bool                                     operator!=      (const SymbolNameIterator& other)       { return symbolIter != other.symbolIter;        }
445
446         inline std::string operator* (void) const
447         {
448                 if (glu::isDataTypeBoolOrBVec(symbolIter->varType.getBasicType()))
449                         return "o_" + symbolIter->name;
450                 else
451                         return symbolIter->name;
452         }
453 };
454
455 template<typename Iterator>
456 inline glu::TransformFeedbackVaryings<SymbolNameIterator<Iterator> > getTFVaryings (Iterator begin, Iterator end)
457 {
458         return glu::TransformFeedbackVaryings<SymbolNameIterator<Iterator> >(SymbolNameIterator<Iterator>(begin), SymbolNameIterator<Iterator>(end));
459 }
460
461 VertexProcessorExecutor::VertexProcessorExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
462         : ShaderExecutor        (renderCtx, shaderSpec)
463         , m_program                     (renderCtx,
464                                                  glu::ProgramSources(sources) << getTFVaryings(shaderSpec.outputs.begin(), shaderSpec.outputs.end())
465                                                                                                           << glu::TransformFeedbackMode(GL_INTERLEAVED_ATTRIBS))
466 {
467 }
468
469 VertexProcessorExecutor::~VertexProcessorExecutor (void)
470 {
471 }
472
473 template<typename Iterator>
474 static int computeTotalScalarSize (Iterator begin, Iterator end)
475 {
476         int size = 0;
477         for (Iterator cur = begin; cur != end; ++cur)
478                 size += cur->varType.getScalarSize();
479         return size;
480 }
481
482 void VertexProcessorExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
483 {
484         const glw::Functions&                                   gl                                      = m_renderCtx.getFunctions();
485         const bool                                                              useTFObject                     = isContextTypeES(m_renderCtx.getType()) || (isContextTypeGLCore(m_renderCtx.getType()) && m_renderCtx.getType().getMajorVersion() >= 4);
486         vector<glu::VertexArrayBinding>                 vertexArrays;
487         de::UniquePtr<glu::TransformFeedback>   transformFeedback       (useTFObject ? new glu::TransformFeedback(m_renderCtx) : DE_NULL);
488         glu::Buffer                                                             outputBuffer            (m_renderCtx);
489         const int                                                               outputBufferStride      = computeTotalScalarSize(m_outputs.begin(), m_outputs.end())*sizeof(deUint32);
490
491         // Setup inputs.
492         for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
493         {
494                 const Symbol&           symbol          = m_inputs[inputNdx];
495                 const void*                     ptr                     = inputs[inputNdx];
496                 const glu::DataType     basicType       = symbol.varType.getBasicType();
497                 const int                       vecSize         = glu::getDataTypeScalarSize(basicType);
498
499                 if (glu::isDataTypeFloatOrVec(basicType))
500                         vertexArrays.push_back(glu::va::Float(symbol.name, vecSize, numValues, 0, (const float*)ptr));
501                 else if (glu::isDataTypeIntOrIVec(basicType))
502                         vertexArrays.push_back(glu::va::Int32(symbol.name, vecSize, numValues, 0, (const deInt32*)ptr));
503                 else if (glu::isDataTypeUintOrUVec(basicType))
504                         vertexArrays.push_back(glu::va::Uint32(symbol.name, vecSize, numValues, 0, (const deUint32*)ptr));
505                 else if (glu::isDataTypeMatrix(basicType))
506                 {
507                         int             numRows = glu::getDataTypeMatrixNumRows(basicType);
508                         int             numCols = glu::getDataTypeMatrixNumColumns(basicType);
509                         int             stride  = numRows * numCols * sizeof(float);
510
511                         for (int colNdx = 0; colNdx < numCols; ++colNdx)
512                                 vertexArrays.push_back(glu::va::Float(symbol.name, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
513                 }
514                 else
515                         DE_ASSERT(false);
516         }
517
518         // Setup TF outputs.
519         if (useTFObject)
520                 gl.bindTransformFeedback(GL_TRANSFORM_FEEDBACK, **transformFeedback);
521         gl.bindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, *outputBuffer);
522         gl.bufferData(GL_TRANSFORM_FEEDBACK_BUFFER, outputBufferStride*numValues, DE_NULL, GL_STREAM_READ);
523         gl.bindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, *outputBuffer);
524         GLU_EXPECT_NO_ERROR(gl.getError(), "Error in TF setup");
525
526         // Draw with rasterization disabled.
527         gl.beginTransformFeedback(GL_POINTS);
528         gl.enable(GL_RASTERIZER_DISCARD);
529         glu::draw(m_renderCtx, m_program.getProgram(), (int)vertexArrays.size(), vertexArrays.empty() ? DE_NULL : &vertexArrays[0],
530                           glu::pr::Points(numValues));
531         gl.disable(GL_RASTERIZER_DISCARD);
532         gl.endTransformFeedback();
533         GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
534
535         // Read back data.
536         {
537                 const void*     srcPtr          = gl.mapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, outputBufferStride*numValues, GL_MAP_READ_BIT);
538                 int                     curOffset       = 0; // Offset in buffer in bytes.
539
540                 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER)");
541                 TCU_CHECK(srcPtr != DE_NULL);
542
543                 for (int outputNdx = 0; outputNdx < (int)m_outputs.size(); outputNdx++)
544                 {
545                         const Symbol&           symbol          = m_outputs[outputNdx];
546                         void*                           dstPtr          = outputs[outputNdx];
547                         const int                       scalarSize      = symbol.varType.getScalarSize();
548
549                         for (int ndx = 0; ndx < numValues; ndx++)
550                                 deMemcpy((deUint32*)dstPtr + scalarSize*ndx, (const deUint8*)srcPtr + curOffset + ndx*outputBufferStride, scalarSize*sizeof(deUint32));
551
552                         curOffset += scalarSize*sizeof(deUint32);
553                 }
554
555                 gl.unmapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER);
556                 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
557         }
558
559         if (useTFObject)
560                 gl.bindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0);
561         gl.bindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
562         GLU_EXPECT_NO_ERROR(gl.getError(), "Restore state");
563 }
564
565 // VertexShaderExecutor
566
567 class VertexShaderExecutor : public VertexProcessorExecutor
568 {
569 public:
570                                                                 VertexShaderExecutor    (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
571 };
572
573 VertexShaderExecutor::VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
574         : VertexProcessorExecutor       (renderCtx, shaderSpec,
575                                                                  glu::ProgramSources() << glu::VertexSource(generateVertexShader(shaderSpec))
576                                                                                                            << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
577 {
578 }
579
580 // GeometryShaderExecutor
581
582 class CheckGeomSupport
583 {
584 public:
585         inline CheckGeomSupport (const glu::RenderContext& renderCtx)
586         {
587                 if (renderCtx.getType().getAPI().getProfile() == glu::PROFILE_ES)
588                         checkExtension(renderCtx, "GL_EXT_geometry_shader");
589         }
590 };
591
592 class GeometryShaderExecutor : private CheckGeomSupport, public VertexProcessorExecutor
593 {
594 public:
595                                                                 GeometryShaderExecutor  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
596 };
597
598 GeometryShaderExecutor::GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
599         : CheckGeomSupport                      (renderCtx)
600         , VertexProcessorExecutor       (renderCtx, shaderSpec,
601                                                                  glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "", "geom_"))
602                                                                                                            << glu::GeometrySource(generateGeometryShader(shaderSpec))
603                                                                                                            << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
604 {
605 }
606
607 // FragmentShaderExecutor
608
609 class FragmentShaderExecutor : public ShaderExecutor
610 {
611 public:
612                                                                 FragmentShaderExecutor  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
613                                                                 ~FragmentShaderExecutor (void);
614
615         bool                                            isOk                                    (void) const                            { return m_program.isOk();                      }
616         void                                            log                                             (tcu::TestLog& dst) const       { dst << m_program;                                     }
617         deUint32                                        getProgram                              (void) const                            { return m_program.getProgram();        }
618
619         void                                            execute                                 (int numValues, const void* const* inputs, void* const* outputs);
620
621 protected:
622         std::vector<const Symbol*>      m_outLocationSymbols;
623         std::map<std::string, int>      m_outLocationMap;
624         glu::ShaderProgram                      m_program;
625 };
626
627 static std::map<std::string, int> generateLocationMap (const std::vector<Symbol>& symbols, std::vector<const Symbol*>& locationSymbols)
628 {
629         std::map<std::string, int>      ret;
630         int                                                     location        = 0;
631
632         locationSymbols.clear();
633
634         for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
635         {
636                 const int       numLocations    = glu::getDataTypeNumLocations(it->varType.getBasicType());
637
638                 TCU_CHECK_INTERNAL(!de::contains(ret, it->name));
639                 de::insert(ret, it->name, location);
640                 location += numLocations;
641
642                 for (int ndx = 0; ndx < numLocations; ++ndx)
643                         locationSymbols.push_back(&*it);
644         }
645
646         return ret;
647 }
648
649 inline bool hasFloatRenderTargets (const glu::RenderContext& renderCtx)
650 {
651         glu::ContextType type = renderCtx.getType();
652         return glu::isContextTypeGLCore(type);
653 }
654
655 FragmentShaderExecutor::FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
656         : ShaderExecutor                (renderCtx, shaderSpec)
657         , m_outLocationSymbols  ()
658         , m_outLocationMap              (generateLocationMap(m_outputs, m_outLocationSymbols))
659         , m_program                             (renderCtx,
660                                                          glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", ""))
661                                                                                                    << glu::FragmentSource(generateFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outLocationMap)))
662 {
663 }
664
665 FragmentShaderExecutor::~FragmentShaderExecutor (void)
666 {
667 }
668
669 inline int queryInt (const glw::Functions& gl, deUint32 pname)
670 {
671         int value = 0;
672         gl.getIntegerv(pname, &value);
673         return value;
674 }
675
676 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
677 {
678         const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
679         {
680                 tcu::TextureFormat::R,
681                 tcu::TextureFormat::RG,
682                 tcu::TextureFormat::RGBA,       // No RGB variants available.
683                 tcu::TextureFormat::RGBA
684         };
685
686         const glu::DataType                                     basicType               = outputType.getBasicType();
687         const int                                                       numComps                = glu::getDataTypeNumComponents(basicType);
688         tcu::TextureFormat::ChannelType         channelType;
689
690         switch (glu::getDataTypeScalarType(basicType))
691         {
692                 case glu::TYPE_UINT:    channelType = tcu::TextureFormat::UNSIGNED_INT32;                                                                                               break;
693                 case glu::TYPE_INT:             channelType = tcu::TextureFormat::SIGNED_INT32;                                                                                                 break;
694                 case glu::TYPE_BOOL:    channelType = tcu::TextureFormat::SIGNED_INT32;                                                                                                 break;
695                 case glu::TYPE_FLOAT:   channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;   break;
696                 default:
697                         throw tcu::InternalError("Invalid output type");
698         }
699
700         DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
701
702         return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
703 }
704
705 void FragmentShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
706 {
707         const glw::Functions&                   gl                                      = m_renderCtx.getFunctions();
708         const bool                                              useIntOutputs           = !hasFloatRenderTargets(m_renderCtx);
709         const int                                               maxRenderbufferSize     = queryInt(gl, GL_MAX_RENDERBUFFER_SIZE);
710         const int                                               framebufferW            = de::min(maxRenderbufferSize, numValues);
711         const int                                               framebufferH            = (numValues / framebufferW) + ((numValues % framebufferW != 0) ? 1 : 0);
712
713         glu::Framebuffer                                framebuffer                     (m_renderCtx);
714         glu::RenderbufferVector                 renderbuffers           (m_renderCtx, m_outLocationSymbols.size());
715
716         vector<glu::VertexArrayBinding> vertexArrays;
717         vector<tcu::Vec2>                               positions                       (numValues);
718
719         if (framebufferH > maxRenderbufferSize)
720                 throw tcu::NotSupportedError("Value count is too high for maximum supported renderbuffer size");
721
722         // Compute positions - 1px points are used to drive fragment shading.
723         for (int valNdx = 0; valNdx < numValues; valNdx++)
724         {
725                 const int               ix              = valNdx % framebufferW;
726                 const int               iy              = valNdx / framebufferW;
727                 const float             fx              = -1.0f + 2.0f*((float(ix) + 0.5f) / float(framebufferW));
728                 const float             fy              = -1.0f + 2.0f*((float(iy) + 0.5f) / float(framebufferH));
729
730                 positions[valNdx] = tcu::Vec2(fx, fy);
731         }
732
733         // Vertex inputs.
734         vertexArrays.push_back(glu::va::Float("a_position", 2, numValues, 0, (const float*)&positions[0]));
735
736         for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
737         {
738                 const Symbol&           symbol          = m_inputs[inputNdx];
739                 const std::string       attribName      = "a_" + symbol.name;
740                 const void*                     ptr                     = inputs[inputNdx];
741                 const glu::DataType     basicType       = symbol.varType.getBasicType();
742                 const int                       vecSize         = glu::getDataTypeScalarSize(basicType);
743
744                 if (glu::isDataTypeFloatOrVec(basicType))
745                         vertexArrays.push_back(glu::va::Float(attribName, vecSize, numValues, 0, (const float*)ptr));
746                 else if (glu::isDataTypeIntOrIVec(basicType))
747                         vertexArrays.push_back(glu::va::Int32(attribName, vecSize, numValues, 0, (const deInt32*)ptr));
748                 else if (glu::isDataTypeUintOrUVec(basicType))
749                         vertexArrays.push_back(glu::va::Uint32(attribName, vecSize, numValues, 0, (const deUint32*)ptr));
750                 else if (glu::isDataTypeMatrix(basicType))
751                 {
752                         int             numRows = glu::getDataTypeMatrixNumRows(basicType);
753                         int             numCols = glu::getDataTypeMatrixNumColumns(basicType);
754                         int             stride  = numRows * numCols * sizeof(float);
755
756                         for (int colNdx = 0; colNdx < numCols; ++colNdx)
757                                 vertexArrays.push_back(glu::va::Float(attribName, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
758                 }
759                 else
760                         DE_ASSERT(false);
761         }
762
763         // Construct framebuffer.
764         gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
765
766         for (int outNdx = 0; outNdx < (int)m_outLocationSymbols.size(); ++outNdx)
767         {
768                 const Symbol&   output                  = *m_outLocationSymbols[outNdx];
769                 const deUint32  renderbuffer    = renderbuffers[outNdx];
770                 const deUint32  format                  = glu::getInternalFormat(getRenderbufferFormatForOutput(output.varType, useIntOutputs));
771
772                 gl.bindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
773                 gl.renderbufferStorage(GL_RENDERBUFFER, format, framebufferW, framebufferH);
774                 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0+outNdx, GL_RENDERBUFFER, renderbuffer);
775         }
776         gl.bindRenderbuffer(GL_RENDERBUFFER, 0);
777         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up framebuffer object");
778         TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
779
780         {
781                 vector<deUint32> drawBuffers(m_outLocationSymbols.size());
782                 for (int ndx = 0; ndx < (int)m_outLocationSymbols.size(); ndx++)
783                         drawBuffers[ndx] = GL_COLOR_ATTACHMENT0+ndx;
784                 gl.drawBuffers((int)drawBuffers.size(), &drawBuffers[0]);
785                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawBuffers()");
786         }
787
788         // Render
789         gl.viewport(0, 0, framebufferW, framebufferH);
790         glu::draw(m_renderCtx, m_program.getProgram(), (int)vertexArrays.size(), &vertexArrays[0],
791                           glu::pr::Points(numValues));
792         GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
793
794         // Read back pixels.
795         {
796                 tcu::TextureLevel       tmpBuf;
797
798                 // \todo [2013-08-07 pyry] Some fast-paths could be added here.
799
800                 for (int outNdx = 0; outNdx < (int)m_outputs.size(); ++outNdx)
801                 {
802                         const Symbol&                           output                  = m_outputs[outNdx];
803                         const int                                       outSize                 = output.varType.getScalarSize();
804                         const int                                       outVecSize              = glu::getDataTypeNumComponents(output.varType.getBasicType());
805                         const int                                       outNumLocs              = glu::getDataTypeNumLocations(output.varType.getBasicType());
806                         deUint32*                                       dstPtrBase              = static_cast<deUint32*>(outputs[outNdx]);
807                         const tcu::TextureFormat        format                  = getRenderbufferFormatForOutput(output.varType, useIntOutputs);
808                         const tcu::TextureFormat        readFormat              (tcu::TextureFormat::RGBA, format.type);
809                         const int                                       outLocation             = de::lookup(m_outLocationMap, output.name);
810
811                         tmpBuf.setStorage(readFormat, framebufferW, framebufferH);
812
813                         for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
814                         {
815                                 gl.readBuffer(GL_COLOR_ATTACHMENT0 + outLocation + locNdx);
816                                 glu::readPixels(m_renderCtx, 0, 0, tmpBuf.getAccess());
817                                 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels");
818
819                                 if (outSize == 4 && outNumLocs == 1)
820                                         deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues*outVecSize*sizeof(deUint32));
821                                 else
822                                 {
823                                         for (int valNdx = 0; valNdx < numValues; valNdx++)
824                                         {
825                                                 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx*4;
826                                                 deUint32*               dstPtr = &dstPtrBase[outSize*valNdx + outVecSize*locNdx];
827                                                 deMemcpy(dstPtr, srcPtr, outVecSize*sizeof(deUint32));
828                                         }
829                                 }
830                         }
831                 }
832         }
833
834         // \todo [2013-08-07 pyry] Clear draw buffers & viewport?
835         gl.bindFramebuffer(GL_FRAMEBUFFER, 0);
836 }
837
838 // Shared utilities for compute and tess executors
839
840 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
841 {
842         switch (glu::getDataTypeScalarSize(type))
843         {
844                 case 1:         return 4u;
845                 case 2:         return 8u;
846                 case 3:         return 16u;
847                 case 4:         return 16u;
848                 default:
849                         DE_ASSERT(false);
850                         return 0u;
851         }
852 }
853
854 class BufferIoExecutor : public ShaderExecutor
855 {
856 public:
857                                                 BufferIoExecutor        (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
858                                                 ~BufferIoExecutor       (void);
859
860         bool                            isOk                            (void) const                            { return m_program.isOk();                      }
861         void                            log                                     (tcu::TestLog& dst) const       { dst << m_program;                                     }
862         deUint32                        getProgram                      (void) const                            { return m_program.getProgram();        }
863
864 protected:
865         enum
866         {
867                 INPUT_BUFFER_BINDING    = 0,
868                 OUTPUT_BUFFER_BINDING   = 1,
869         };
870
871         void                            initBuffers                     (int numValues);
872         deUint32                        getInputBuffer          (void) const            { return *m_inputBuffer;                                        }
873         deUint32                        getOutputBuffer         (void) const            { return *m_outputBuffer;                                       }
874         deUint32                        getInputStride          (void) const            { return getLayoutStride(m_inputLayout);        }
875         deUint32                        getOutputStride         (void) const            { return getLayoutStride(m_outputLayout);       }
876
877         void                            uploadInputBuffer       (const void* const* inputPtrs, int numValues);
878         void                            readOutputBuffer        (void* const* outputPtrs, int numValues);
879
880         static void                     declareBufferBlocks     (std::ostream& src, const ShaderSpec& spec);
881         static void                     generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
882
883         glu::ShaderProgram      m_program;
884
885 private:
886         struct VarLayout
887         {
888                 deUint32                offset;
889                 deUint32                stride;
890                 deUint32                matrixStride;
891
892                 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
893         };
894
895         void                            resizeInputBuffer       (int newSize);
896         void                            resizeOutputBuffer      (int newSize);
897
898         static void                     computeVarLayout        (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
899         static deUint32         getLayoutStride         (const vector<VarLayout>& layout);
900
901         static void                     copyToBuffer            (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
902         static void                     copyFromBuffer          (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
903
904         glu::Buffer                     m_inputBuffer;
905         glu::Buffer                     m_outputBuffer;
906
907         vector<VarLayout>       m_inputLayout;
908         vector<VarLayout>       m_outputLayout;
909 };
910
911 BufferIoExecutor::BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
912         : ShaderExecutor        (renderCtx, shaderSpec)
913         , m_program                     (renderCtx, sources)
914         , m_inputBuffer         (renderCtx)
915         , m_outputBuffer        (renderCtx)
916 {
917         computeVarLayout(m_inputs, &m_inputLayout);
918         computeVarLayout(m_outputs, &m_outputLayout);
919 }
920
921 BufferIoExecutor::~BufferIoExecutor (void)
922 {
923 }
924
925 void BufferIoExecutor::resizeInputBuffer (int newSize)
926 {
927         const glw::Functions& gl = m_renderCtx.getFunctions();
928         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_inputBuffer);
929         gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
930         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate input buffer");
931 }
932
933 void BufferIoExecutor::resizeOutputBuffer (int newSize)
934 {
935         const glw::Functions& gl = m_renderCtx.getFunctions();
936         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_outputBuffer);
937         gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
938         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate output buffer");
939 }
940
941 void BufferIoExecutor::initBuffers (int numValues)
942 {
943         const deUint32          inputStride                     = getLayoutStride(m_inputLayout);
944         const deUint32          outputStride            = getLayoutStride(m_outputLayout);
945         const int                       inputBufferSize         = numValues * inputStride;
946         const int                       outputBufferSize        = numValues * outputStride;
947
948         resizeInputBuffer(inputBufferSize);
949         resizeOutputBuffer(outputBufferSize);
950 }
951
952 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
953 {
954         deUint32        maxAlignment    = 0;
955         deUint32        curOffset               = 0;
956
957         DE_ASSERT(layout->empty());
958         layout->resize(symbols.size());
959
960         for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
961         {
962                 const Symbol&           symbol          = symbols[varNdx];
963                 const glu::DataType     basicType       = symbol.varType.getBasicType();
964                 VarLayout&                      layoutEntry     = (*layout)[varNdx];
965
966                 if (glu::isDataTypeScalarOrVector(basicType))
967                 {
968                         const deUint32  alignment       = getVecStd430ByteAlignment(basicType);
969                         const deUint32  size            = (deUint32)glu::getDataTypeScalarSize(basicType)*sizeof(deUint32);
970
971                         curOffset               = (deUint32)deAlign32((int)curOffset, (int)alignment);
972                         maxAlignment    = de::max(maxAlignment, alignment);
973
974                         layoutEntry.offset                      = curOffset;
975                         layoutEntry.matrixStride        = 0;
976
977                         curOffset += size;
978                 }
979                 else if (glu::isDataTypeMatrix(basicType))
980                 {
981                         const int                               numVecs                 = glu::getDataTypeMatrixNumColumns(basicType);
982                         const glu::DataType             vecType                 = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
983                         const deUint32                  vecAlignment    = getVecStd430ByteAlignment(vecType);
984
985                         curOffset               = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
986                         maxAlignment    = de::max(maxAlignment, vecAlignment);
987
988                         layoutEntry.offset                      = curOffset;
989                         layoutEntry.matrixStride        = vecAlignment;
990
991                         curOffset += vecAlignment*numVecs;
992                 }
993                 else
994                         DE_ASSERT(false);
995         }
996
997         {
998                 const deUint32  totalSize       = (deUint32)deAlign32(curOffset, maxAlignment);
999
1000                 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1001                         varIter->stride = totalSize;
1002         }
1003 }
1004
1005 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1006 {
1007         return layout.empty() ? 0 : layout[0].stride;
1008 }
1009
1010 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1011 {
1012         if (varType.isBasicType())
1013         {
1014                 const glu::DataType             basicType               = varType.getBasicType();
1015                 const bool                              isMatrix                = glu::isDataTypeMatrix(basicType);
1016                 const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
1017                 const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1018                 const int                               numComps                = scalarSize / numVecs;
1019
1020                 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1021                 {
1022                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1023                         {
1024                                 const int               srcOffset               = sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1025                                 const int               dstOffset               = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1026                                 const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
1027                                 deUint8*                dstPtr                  = (deUint8*)dstBasePtr + dstOffset;
1028
1029                                 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1030                         }
1031                 }
1032         }
1033         else
1034                 throw tcu::InternalError("Unsupported type");
1035 }
1036
1037 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1038 {
1039         if (varType.isBasicType())
1040         {
1041                 const glu::DataType             basicType               = varType.getBasicType();
1042                 const bool                              isMatrix                = glu::isDataTypeMatrix(basicType);
1043                 const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
1044                 const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1045                 const int                               numComps                = scalarSize / numVecs;
1046
1047                 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1048                 {
1049                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1050                         {
1051                                 const int               srcOffset               = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1052                                 const int               dstOffset               = sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1053                                 const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
1054                                 deUint8*                dstPtr                  = (deUint8*)dstBasePtr + dstOffset;
1055
1056                                 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1057                         }
1058                 }
1059         }
1060         else
1061                 throw tcu::InternalError("Unsupported type");
1062 }
1063
1064 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1065 {
1066         const glw::Functions&   gl                              = m_renderCtx.getFunctions();
1067         const deUint32                  buffer                  = *m_inputBuffer;
1068         const deUint32                  inputStride             = getLayoutStride(m_inputLayout);
1069         const int                               inputBufferSize = inputStride*numValues;
1070
1071         if (inputBufferSize == 0)
1072                 return; // No inputs
1073
1074         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1075         void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, inputBufferSize, GL_MAP_WRITE_BIT);
1076         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1077         TCU_CHECK(mapPtr);
1078
1079         try
1080         {
1081                 DE_ASSERT(m_inputs.size() == m_inputLayout.size());
1082                 for (size_t inputNdx = 0; inputNdx < m_inputs.size(); ++inputNdx)
1083                 {
1084                         const glu::VarType&             varType         = m_inputs[inputNdx].varType;
1085                         const VarLayout&                layout          = m_inputLayout[inputNdx];
1086
1087                         copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], mapPtr);
1088                 }
1089         }
1090         catch (...)
1091         {
1092                 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1093                 throw;
1094         }
1095
1096         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1097         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1098 }
1099
1100 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1101 {
1102         const glw::Functions&   gl                                      = m_renderCtx.getFunctions();
1103         const deUint32                  buffer                          = *m_outputBuffer;
1104         const deUint32                  outputStride            = getLayoutStride(m_outputLayout);
1105         const int                               outputBufferSize        = numValues*outputStride;
1106
1107         DE_ASSERT(outputBufferSize > 0); // At least some outputs are required.
1108
1109         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1110         void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, outputBufferSize, GL_MAP_READ_BIT);
1111         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1112         TCU_CHECK(mapPtr);
1113
1114         try
1115         {
1116                 DE_ASSERT(m_outputs.size() == m_outputLayout.size());
1117                 for (size_t outputNdx = 0; outputNdx < m_outputs.size(); ++outputNdx)
1118                 {
1119                         const glu::VarType&             varType         = m_outputs[outputNdx].varType;
1120                         const VarLayout&                layout          = m_outputLayout[outputNdx];
1121
1122                         copyFromBuffer(varType, layout, numValues, mapPtr, outputPtrs[outputNdx]);
1123                 }
1124         }
1125         catch (...)
1126         {
1127                 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1128                 throw;
1129         }
1130
1131         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1132         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1133 }
1134
1135 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1136 {
1137         // Input struct
1138         if (!spec.inputs.empty())
1139         {
1140                 glu::StructType inputStruct("Inputs");
1141                 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1142                         inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1143                 src << glu::declare(&inputStruct) << ";\n";
1144         }
1145
1146         // Output struct
1147         {
1148                 glu::StructType outputStruct("Outputs");
1149                 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1150                         outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1151                 src << glu::declare(&outputStruct) << ";\n";
1152         }
1153
1154         src << "\n";
1155
1156         if (!spec.inputs.empty())
1157         {
1158                 src     << "layout(binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1159                         << "{\n"
1160                         << "    Inputs inputs[];\n"
1161                         << "};\n";
1162         }
1163
1164         src     << "layout(binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1165                 << "{\n"
1166                 << "    Outputs outputs[];\n"
1167                 << "};\n"
1168                 << "\n";
1169 }
1170
1171 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1172 {
1173         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1174                 src << "\t" << glu::declare(symIter->varType, symIter->name) << " = inputs[" << invocationNdxName << "]." << symIter->name << ";\n";
1175
1176         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1177                 src << "\t" << glu::declare(symIter->varType, symIter->name) << ";\n";
1178
1179         src << "\n";
1180
1181         {
1182                 std::istringstream      opSrc   (spec.source);
1183                 std::string                     line;
1184
1185                 while (std::getline(opSrc, line))
1186                         src << "\t" << line << "\n";
1187         }
1188
1189         src << "\n";
1190         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1191                 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1192 }
1193
1194 // ComputeShaderExecutor
1195
1196 class ComputeShaderExecutor : public BufferIoExecutor
1197 {
1198 public:
1199                                                 ComputeShaderExecutor   (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1200                                                 ~ComputeShaderExecutor  (void);
1201
1202         void                            execute                                 (int numValues, const void* const* inputs, void* const* outputs);
1203
1204 protected:
1205         static std::string      generateComputeShader   (const ShaderSpec& spec);
1206
1207         tcu::IVec3                      m_maxWorkSize;
1208 };
1209
1210 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
1211 {
1212         std::ostringstream src;
1213
1214         src << glu::getGLSLVersionDeclaration(spec.version) << "\n";
1215
1216         if (!spec.globalDeclarations.empty())
1217                 src << spec.globalDeclarations << "\n";
1218
1219         src << "layout(local_size_x = 1) in;\n"
1220                 << "\n";
1221
1222         declareBufferBlocks(src, spec);
1223
1224         src << "void main (void)\n"
1225                 << "{\n"
1226                 << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
1227                 << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
1228
1229         generateExecBufferIo(src, spec, "invocationNdx");
1230
1231         src << "}\n";
1232
1233         return src.str();
1234 }
1235
1236 ComputeShaderExecutor::ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1237         : BufferIoExecutor      (renderCtx, shaderSpec,
1238                                                  glu::ProgramSources() << glu::ComputeSource(generateComputeShader(shaderSpec)))
1239 {
1240         m_maxWorkSize   = tcu::IVec3(128,128,64); // Minimum in 3plus
1241 }
1242
1243 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1244 {
1245 }
1246
1247 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1248 {
1249         const glw::Functions&   gl                                              = m_renderCtx.getFunctions();
1250         const int                               maxValuesPerInvocation  = m_maxWorkSize[0];
1251         const deUint32                  inputStride                             = getInputStride();
1252         const deUint32                  outputStride                    = getOutputStride();
1253
1254         initBuffers(numValues);
1255
1256         // Setup input buffer & copy data
1257         uploadInputBuffer(inputs, numValues);
1258
1259         // Perform compute invocations
1260         {
1261                 int curOffset = 0;
1262                 while (curOffset < numValues)
1263                 {
1264                         const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
1265
1266                         if (inputStride > 0)
1267                                 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer(), curOffset*inputStride, numToExec*inputStride);
1268
1269                         gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer(), curOffset*outputStride, numToExec*outputStride);
1270                         GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferRange(GL_SHADER_STORAGE_BUFFER)");
1271
1272                         gl.dispatchCompute(numToExec, 1, 1);
1273                         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1274
1275                         curOffset += numToExec;
1276                 }
1277         }
1278
1279         // Read back data
1280         readOutputBuffer(outputs, numValues);
1281 }
1282
1283 // Tessellation utils
1284
1285 static std::string generateVertexShaderForTess (glu::GLSLVersion version)
1286 {
1287         std::ostringstream      src;
1288
1289         src << glu::getGLSLVersionDeclaration(version) << "\n";
1290
1291         src << "void main (void)\n{\n"
1292                 << "    gl_Position = vec4(gl_VertexID/2, gl_VertexID%2, 0.0, 1.0);\n"
1293                 << "}\n";
1294
1295         return src.str();
1296 }
1297
1298 class CheckTessSupport
1299 {
1300 public:
1301         enum Stage
1302         {
1303                 STAGE_CONTROL = 0,
1304                 STAGE_EVAL,
1305         };
1306
1307         inline CheckTessSupport (const glu::RenderContext& renderCtx, Stage stage)
1308         {
1309                 const int numBlockRequired = 2; // highest binding is always 1 (output) i.e. count == 2
1310
1311                 if (renderCtx.getType().getAPI().getProfile() == glu::PROFILE_ES)
1312                         checkExtension(renderCtx, "GL_EXT_tessellation_shader");
1313
1314                 if (stage == STAGE_CONTROL)
1315                         checkLimit(renderCtx, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, numBlockRequired);
1316                 else if (stage == STAGE_EVAL)
1317                         checkLimit(renderCtx, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, numBlockRequired);
1318                 else
1319                         DE_ASSERT(false);
1320         }
1321 };
1322
1323 // TessControlExecutor
1324
1325 class TessControlExecutor : private CheckTessSupport, public BufferIoExecutor
1326 {
1327 public:
1328                                                 TessControlExecutor                     (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1329                                                 ~TessControlExecutor            (void);
1330
1331         void                            execute                                         (int numValues, const void* const* inputs, void* const* outputs);
1332
1333 protected:
1334         static std::string      generateTessControlShader       (const ShaderSpec& shaderSpec);
1335 };
1336
1337 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
1338 {
1339         std::ostringstream src;
1340
1341         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1342
1343         if (shaderSpec.version == glu::GLSL_VERSION_310_ES)
1344                 src << "#extension GL_EXT_tessellation_shader : require\n";
1345
1346         if (!shaderSpec.globalDeclarations.empty())
1347                 src << shaderSpec.globalDeclarations << "\n";
1348
1349         src << "\nlayout(vertices = 1) out;\n\n";
1350
1351         declareBufferBlocks(src, shaderSpec);
1352
1353         src << "void main (void)\n{\n";
1354
1355         for (int ndx = 0; ndx < 2; ndx++)
1356                 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1357
1358         for (int ndx = 0; ndx < 4; ndx++)
1359                 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1360
1361         src << "\n"
1362                 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
1363
1364         generateExecBufferIo(src, shaderSpec, "invocationId");
1365
1366         src << "}\n";
1367
1368         return src.str();
1369 }
1370
1371 static std::string generateEmptyTessEvalShader (glu::GLSLVersion version)
1372 {
1373         std::ostringstream src;
1374
1375         src << glu::getGLSLVersionDeclaration(version) << "\n";
1376
1377         if (version == glu::GLSL_VERSION_310_ES)
1378                 src << "#extension GL_EXT_tessellation_shader : require\n\n";
1379
1380         src << "layout(triangles, ccw) in;\n";
1381
1382         src << "\nvoid main (void)\n{\n"
1383                 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
1384                 << "}\n";
1385
1386         return src.str();
1387 }
1388
1389 TessControlExecutor::TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1390         : CheckTessSupport      (renderCtx, STAGE_CONTROL)
1391         , BufferIoExecutor      (renderCtx, shaderSpec, glu::ProgramSources()
1392                                                         << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1393                                                         << glu::TessellationControlSource(generateTessControlShader(shaderSpec))
1394                                                         << glu::TessellationEvaluationSource(generateEmptyTessEvalShader(shaderSpec.version))
1395                                                         << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1396 {
1397 }
1398
1399 TessControlExecutor::~TessControlExecutor (void)
1400 {
1401 }
1402
1403 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1404 {
1405         const glw::Functions&   gl      = m_renderCtx.getFunctions();
1406
1407         initBuffers(numValues);
1408
1409         // Setup input buffer & copy data
1410         uploadInputBuffer(inputs, numValues);
1411
1412         if (!m_inputs.empty())
1413                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1414
1415         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1416
1417         // Render patches
1418         gl.patchParameteri(GL_PATCH_VERTICES, 3);
1419         gl.drawArrays(GL_PATCHES, 0, 3*numValues);
1420
1421         // Read back data
1422         readOutputBuffer(outputs, numValues);
1423 }
1424
1425 // TessEvaluationExecutor
1426
1427 class TessEvaluationExecutor : private CheckTessSupport, public BufferIoExecutor
1428 {
1429 public:
1430                                                 TessEvaluationExecutor  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1431                                                 ~TessEvaluationExecutor (void);
1432
1433         void                            execute                                 (int numValues, const void* const* inputs, void* const* outputs);
1434
1435 protected:
1436         static std::string      generateTessEvalShader  (const ShaderSpec& shaderSpec);
1437 };
1438
1439 static std::string generatePassthroughTessControlShader (glu::GLSLVersion version)
1440 {
1441         std::ostringstream src;
1442
1443         src << glu::getGLSLVersionDeclaration(version) << "\n";
1444
1445         if (version == glu::GLSL_VERSION_310_ES)
1446                 src << "#extension GL_EXT_tessellation_shader : require\n\n";
1447
1448         src << "layout(vertices = 1) out;\n\n";
1449
1450         src << "void main (void)\n{\n";
1451
1452         for (int ndx = 0; ndx < 2; ndx++)
1453                 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1454
1455         for (int ndx = 0; ndx < 4; ndx++)
1456                 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1457
1458         src << "}\n";
1459
1460         return src.str();
1461 }
1462
1463 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
1464 {
1465         std::ostringstream src;
1466
1467         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1468
1469         if (shaderSpec.version == glu::GLSL_VERSION_310_ES)
1470                 src << "#extension GL_EXT_tessellation_shader : require\n";
1471
1472         if (!shaderSpec.globalDeclarations.empty())
1473                 src << shaderSpec.globalDeclarations << "\n";
1474
1475         src << "\n";
1476
1477         src << "layout(isolines, equal_spacing) in;\n\n";
1478
1479         declareBufferBlocks(src, shaderSpec);
1480
1481         src << "void main (void)\n{\n"
1482                 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
1483                 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
1484
1485         generateExecBufferIo(src, shaderSpec, "invocationId");
1486
1487         src     << "}\n";
1488
1489         return src.str();
1490 }
1491
1492 TessEvaluationExecutor::TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1493         : CheckTessSupport      (renderCtx, STAGE_EVAL)
1494         , BufferIoExecutor      (renderCtx, shaderSpec, glu::ProgramSources()
1495                                                         << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1496                                                         << glu::TessellationControlSource(generatePassthroughTessControlShader(shaderSpec.version))
1497                                                         << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec))
1498                                                         << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1499 {
1500 }
1501
1502 TessEvaluationExecutor::~TessEvaluationExecutor (void)
1503 {
1504 }
1505
1506 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1507 {
1508         const glw::Functions&   gl                              = m_renderCtx.getFunctions();
1509         const int                               alignedValues   = deAlign32(numValues, 2);
1510
1511         // Initialize buffers with aligned value count to make room for padding
1512         initBuffers(alignedValues);
1513
1514         // Setup input buffer & copy data
1515         uploadInputBuffer(inputs, numValues);
1516
1517         // \todo [2014-06-26 pyry] Duplicate last value in the buffer to prevent infinite loops for example?
1518
1519         if (!m_inputs.empty())
1520                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1521
1522         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1523
1524         // Render patches
1525         gl.patchParameteri(GL_PATCH_VERTICES, 2);
1526         gl.drawArrays(GL_PATCHES, 0, alignedValues);
1527
1528         // Read back data
1529         readOutputBuffer(outputs, numValues);
1530 }
1531
1532 // Utilities
1533
1534 ShaderExecutor* createExecutor (const glu::RenderContext& renderCtx, glu::ShaderType shaderType, const ShaderSpec& shaderSpec)
1535 {
1536         switch (shaderType)
1537         {
1538                 case glu::SHADERTYPE_VERTEX:                                    return new VertexShaderExecutor         (renderCtx, shaderSpec);
1539                 case glu::SHADERTYPE_TESSELLATION_CONTROL:              return new TessControlExecutor          (renderCtx, shaderSpec);
1540                 case glu::SHADERTYPE_TESSELLATION_EVALUATION:   return new TessEvaluationExecutor       (renderCtx, shaderSpec);
1541                 case glu::SHADERTYPE_GEOMETRY:                                  return new GeometryShaderExecutor       (renderCtx, shaderSpec);
1542                 case glu::SHADERTYPE_FRAGMENT:                                  return new FragmentShaderExecutor       (renderCtx, shaderSpec);
1543                 case glu::SHADERTYPE_COMPUTE:                                   return new ComputeShaderExecutor        (renderCtx, shaderSpec);
1544                 default:
1545                         throw tcu::InternalError("Unsupported shader type");
1546         }
1547 }
1548
1549 } // ShaderExecUtil
1550 } // gls
1551 } // deqp