Fix ShaderExecutor usage for core OpenGL
[platform/upstream/VK-GL-CTS.git] / modules / glshared / glsShaderExecUtil.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL (ES) Module
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Shader execution utilities.
22  *//*--------------------------------------------------------------------*/
23
24 #include "glsShaderExecUtil.hpp"
25 #include "gluRenderContext.hpp"
26 #include "gluDrawUtil.hpp"
27 #include "gluObjectWrapper.hpp"
28 #include "gluShaderProgram.hpp"
29 #include "gluTextureUtil.hpp"
30 #include "gluProgramInterfaceQuery.hpp"
31 #include "gluPixelTransfer.hpp"
32 #include "gluStrUtil.hpp"
33 #include "tcuTestLog.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deSTLUtil.hpp"
37 #include "deStringUtil.hpp"
38 #include "deUniquePtr.hpp"
39 #include "deMemory.h"
40
41 #include <map>
42
43 namespace deqp
44 {
45 namespace gls
46 {
47
48 namespace ShaderExecUtil
49 {
50
51 using std::vector;
52
53 static bool isExtensionSupported (const glu::RenderContext& renderCtx, const std::string& extension)
54 {
55         const glw::Functions&   gl              = renderCtx.getFunctions();
56         int                                             numExts = 0;
57
58         gl.getIntegerv(GL_NUM_EXTENSIONS, &numExts);
59
60         for (int ndx = 0; ndx < numExts; ndx++)
61         {
62                 const char* curExt = (const char*)gl.getStringi(GL_EXTENSIONS, ndx);
63
64                 if (extension == curExt)
65                         return true;
66         }
67
68         return false;
69 }
70
71 static void checkExtension (const glu::RenderContext& renderCtx, const std::string& extension)
72 {
73         if (!isExtensionSupported(renderCtx, extension))
74                 throw tcu::NotSupportedError(extension + " is not supported");
75 }
76
77 static void checkLimit (const glu::RenderContext& renderCtx, deUint32 pname, int required)
78 {
79         const glw::Functions&   gl                                      = renderCtx.getFunctions();
80         int                                             implementationLimit     = -1;
81         deUint32                                error;
82
83         gl.getIntegerv(pname, &implementationLimit);
84         error = gl.getError();
85
86         if (error != GL_NO_ERROR)
87                 throw tcu::TestError("Failed to query " + de::toString(glu::getGettableStateStr(pname)) + " - got " + de::toString(glu::getErrorStr(error)));
88         if (implementationLimit < required)
89                 throw tcu::NotSupportedError("Test requires " + de::toString(glu::getGettableStateStr(pname)) + " >= " + de::toString(required) + ", got " + de::toString(implementationLimit));
90 }
91
92 // Shader utilities
93
94 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
95 {
96         const bool                      usesInout       = glu::glslVersionUsesInOutQualifiers(shaderSpec.version);
97         const char*                     in                      = usesInout ? "in"              : "attribute";
98         const char*                     out                     = usesInout ? "out"             : "varying";
99         std::ostringstream      src;
100
101         DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
102
103         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
104
105         if (!shaderSpec.globalDeclarations.empty())
106                 src << shaderSpec.globalDeclarations << "\n";
107
108         src << in << " highp vec4 a_position;\n";
109
110         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
111                 src << in << " " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
112
113         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
114         {
115                 DE_ASSERT(output->varType.isBasicType());
116
117                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
118                 {
119                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
120                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
121                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
122
123                         src << "flat " << out << " " << glu::declare(intType, outputPrefix + output->name) << ";\n";
124                 }
125                 else
126                         src << "flat " << out << " " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
127         }
128
129         src << "\n"
130                 << "void main (void)\n"
131                 << "{\n"
132                 << "    gl_Position = a_position;\n"
133                 << "    gl_PointSize = 1.0;\n\n";
134
135         // Declare & fetch local input variables
136         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
137                 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
138
139         // Declare local output variables
140         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
141                 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
142
143         // Operation - indented to correct level.
144         {
145                 std::istringstream      opSrc   (shaderSpec.source);
146                 std::string                     line;
147
148                 while (std::getline(opSrc, line))
149                         src << "\t" << line << "\n";
150         }
151
152         // Assignments to outputs.
153         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
154         {
155                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
156                 {
157                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
158                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
159
160                         src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
161                 }
162                 else
163                         src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
164         }
165
166         src << "}\n";
167
168         return src.str();
169 }
170
171 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
172 {
173         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
174         DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
175
176         std::ostringstream      src;
177
178         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
179
180         if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
181                 src << "#extension GL_EXT_geometry_shader : require\n";
182
183         if (!shaderSpec.globalDeclarations.empty())
184                 src << shaderSpec.globalDeclarations << "\n";
185
186         src << "layout(points) in;\n"
187                 << "layout(points, max_vertices = 1) out;\n";
188
189         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
190                 src << "flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
191
192         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
193         {
194                 DE_ASSERT(output->varType.isBasicType());
195
196                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
197                 {
198                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
199                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
200                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
201
202                         src << "flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
203                 }
204                 else
205                         src << "flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
206         }
207
208         src << "\n"
209                 << "void main (void)\n"
210                 << "{\n"
211                 << "    gl_Position = gl_in[0].gl_Position;\n\n";
212
213         // Fetch input variables
214         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
215                 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
216
217         // Declare local output variables.
218         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
219                 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
220
221         src << "\n";
222
223         // Operation - indented to correct level.
224         {
225                 std::istringstream      opSrc   (shaderSpec.source);
226                 std::string                     line;
227
228                 while (std::getline(opSrc, line))
229                         src << "\t" << line << "\n";
230         }
231
232         // Assignments to outputs.
233         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
234         {
235                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
236                 {
237                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
238                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
239
240                         src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
241                 }
242                 else
243                         src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
244         }
245
246         src << "        EmitVertex();\n"
247                 << "    EndPrimitive();\n"
248                 << "}\n";
249
250         return src.str();
251 }
252
253 static std::string generateEmptyFragmentSource (glu::GLSLVersion version)
254 {
255         const bool                      customOut               = glu::glslVersionUsesInOutQualifiers(version);
256         std::ostringstream      src;
257
258         src << glu::getGLSLVersionDeclaration(version) << "\n";
259
260         // \todo [2013-08-05 pyry] Do we need one dummy output?
261
262         src << "void main (void)\n{\n";
263         if (!customOut)
264                 src << "        gl_FragColor = vec4(0.0);\n";
265         src << "}\n";
266
267         return src.str();
268 }
269
270 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
271 {
272         // flat qualifier is not present in earlier versions?
273         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
274
275         std::ostringstream src;
276
277         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"
278                 << "in highp vec4 a_position;\n";
279
280         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
281         {
282                 src << "in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
283                         << "flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
284         }
285
286         src << "\nvoid main (void)\n{\n"
287                 << "    gl_Position = a_position;\n"
288                 << "    gl_PointSize = 1.0;\n";
289
290         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
291                 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
292
293         src << "}\n";
294
295         return src.str();
296 }
297
298 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
299 {
300         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
301
302         for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
303         {
304                 const Symbol&                           output          = shaderSpec.outputs[outNdx];
305                 const int                                       location        = de::lookup(outLocationMap, output.name);
306                 const std::string                       outVarName      = outputPrefix + output.name;
307                 glu::VariableDeclaration        decl            (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
308
309                 TCU_CHECK_INTERNAL(output.varType.isBasicType());
310
311                 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
312                 {
313                         const int                       vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
314                         const glu::DataType     uintBasicType   = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
315                         const glu::VarType      uintType                (uintBasicType, glu::PRECISION_HIGHP);
316
317                         decl.varType = uintType;
318                         src << decl << ";\n";
319                 }
320                 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
321                 {
322                         const int                       vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
323                         const glu::DataType     intBasicType    = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
324                         const glu::VarType      intType                 (intBasicType, glu::PRECISION_HIGHP);
325
326                         decl.varType = intType;
327                         src << decl << ";\n";
328                 }
329                 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
330                 {
331                         const int                       vecSize                 = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
332                         const int                       numVecs                 = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
333                         const glu::DataType     uintBasicType   = glu::getDataTypeUintVec(vecSize);
334                         const glu::VarType      uintType                (uintBasicType, glu::PRECISION_HIGHP);
335
336                         decl.varType = uintType;
337                         for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
338                         {
339                                 decl.name                               = outVarName + "_" + de::toString(vecNdx);
340                                 decl.layout.location    = location + vecNdx;
341                                 src << decl << ";\n";
342                         }
343                 }
344                 else
345                         src << decl << ";\n";
346         }
347 }
348
349 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix)
350 {
351         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
352         {
353                 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
354                         src << "        o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
355                 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
356                 {
357                         const int       numVecs         = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
358
359                         for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
360                                 if (useIntOutputs)
361                                         src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
362                                 else
363                                         src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << valuePrefix << output->name << "[" << vecNdx << "];\n";
364                 }
365                 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
366                 {
367                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
368                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
369
370                         src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
371                 }
372                 else
373                         src << "\t" << outputPrefix << output->name << " = " << valuePrefix << output->name << ";\n";
374         }
375 }
376
377 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
378 {
379         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
380
381         std::ostringstream      src;
382
383         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
384
385         if (!shaderSpec.globalDeclarations.empty())
386                 src << shaderSpec.globalDeclarations << "\n";
387
388         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
389                 src << "flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
390
391         generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
392
393         src << "\nvoid main (void)\n{\n";
394
395         // Declare & fetch local input variables
396         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
397                 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
398
399         // Declare output variables
400         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
401                 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
402
403         // Operation - indented to correct level.
404         {
405                 std::istringstream      opSrc   (shaderSpec.source);
406                 std::string                     line;
407
408                 while (std::getline(opSrc, line))
409                         src << "\t" << line << "\n";
410         }
411
412         generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix);
413
414         src << "}\n";
415
416         return src.str();
417 }
418
419 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
420 {
421         DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
422
423         std::ostringstream      src;
424
425         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
426
427         if (!shaderSpec.globalDeclarations.empty())
428                 src << shaderSpec.globalDeclarations << "\n";
429
430         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
431         {
432                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
433                 {
434                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
435                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
436                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
437
438                         src << "flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
439                 }
440                 else
441                         src << "flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
442         }
443
444         generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
445
446         src << "\nvoid main (void)\n{\n";
447
448         generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
449
450         src << "}\n";
451
452         return src.str();
453 }
454
455 // ShaderExecutor
456
457 ShaderExecutor::ShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
458         : m_renderCtx   (renderCtx)
459         , m_inputs              (shaderSpec.inputs)
460         , m_outputs             (shaderSpec.outputs)
461 {
462 }
463
464 ShaderExecutor::~ShaderExecutor (void)
465 {
466 }
467
468 void ShaderExecutor::useProgram (void)
469 {
470         DE_ASSERT(isOk());
471         m_renderCtx.getFunctions().useProgram(getProgram());
472 }
473
474 // FragmentOutExecutor
475
476 struct FragmentOutputLayout
477 {
478         std::vector<const Symbol*>              locationSymbols;                //! Symbols by location
479         std::map<std::string, int>              locationMap;                    //! Map from symbol name to start location
480 };
481
482 class FragmentOutExecutor : public ShaderExecutor
483 {
484 public:
485                                                                 FragmentOutExecutor             (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
486                                                                 ~FragmentOutExecutor    (void);
487
488         void                                            execute                                 (int numValues, const void* const* inputs, void* const* outputs);
489
490 protected:
491         const FragmentOutputLayout      m_outputLayout;
492 };
493
494 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
495 {
496         FragmentOutputLayout    ret;
497         int                                             location        = 0;
498
499         for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
500         {
501                 const int       numLocations    = glu::getDataTypeNumLocations(it->varType.getBasicType());
502
503                 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
504                 de::insert(ret.locationMap, it->name, location);
505                 location += numLocations;
506
507                 for (int ndx = 0; ndx < numLocations; ++ndx)
508                         ret.locationSymbols.push_back(&*it);
509         }
510
511         return ret;
512 }
513
514 inline bool hasFloatRenderTargets (const glu::RenderContext& renderCtx)
515 {
516         glu::ContextType type = renderCtx.getType();
517         return glu::isContextTypeGLCore(type);
518 }
519
520 FragmentOutExecutor::FragmentOutExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
521         : ShaderExecutor        (renderCtx, shaderSpec)
522         , m_outputLayout        (computeFragmentOutputLayout(m_outputs))
523 {
524 }
525
526 FragmentOutExecutor::~FragmentOutExecutor (void)
527 {
528 }
529
530 inline int queryInt (const glw::Functions& gl, deUint32 pname)
531 {
532         int value = 0;
533         gl.getIntegerv(pname, &value);
534         return value;
535 }
536
537 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
538 {
539         const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
540         {
541                 tcu::TextureFormat::R,
542                 tcu::TextureFormat::RG,
543                 tcu::TextureFormat::RGBA,       // No RGB variants available.
544                 tcu::TextureFormat::RGBA
545         };
546
547         const glu::DataType                                     basicType               = outputType.getBasicType();
548         const int                                                       numComps                = glu::getDataTypeNumComponents(basicType);
549         tcu::TextureFormat::ChannelType         channelType;
550
551         switch (glu::getDataTypeScalarType(basicType))
552         {
553                 case glu::TYPE_UINT:    channelType = tcu::TextureFormat::UNSIGNED_INT32;                                                                                               break;
554                 case glu::TYPE_INT:             channelType = tcu::TextureFormat::SIGNED_INT32;                                                                                                 break;
555                 case glu::TYPE_BOOL:    channelType = tcu::TextureFormat::SIGNED_INT32;                                                                                                 break;
556                 case glu::TYPE_FLOAT:   channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;   break;
557                 default:
558                         throw tcu::InternalError("Invalid output type");
559         }
560
561         DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
562
563         return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
564 }
565
566 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
567 {
568         const glw::Functions&                   gl                                      = m_renderCtx.getFunctions();
569         const bool                                              useIntOutputs           = !hasFloatRenderTargets(m_renderCtx);
570         const int                                               maxRenderbufferSize     = queryInt(gl, GL_MAX_RENDERBUFFER_SIZE);
571         const int                                               framebufferW            = de::min(maxRenderbufferSize, numValues);
572         const int                                               framebufferH            = (numValues / framebufferW) + ((numValues % framebufferW != 0) ? 1 : 0);
573
574         glu::Framebuffer                                framebuffer                     (m_renderCtx);
575         glu::RenderbufferVector                 renderbuffers           (m_renderCtx, m_outputLayout.locationSymbols.size());
576
577         vector<glu::VertexArrayBinding> vertexArrays;
578         vector<tcu::Vec2>                               positions                       (numValues);
579
580         if (framebufferH > maxRenderbufferSize)
581                 throw tcu::NotSupportedError("Value count is too high for maximum supported renderbuffer size");
582
583         // Compute positions - 1px points are used to drive fragment shading.
584         for (int valNdx = 0; valNdx < numValues; valNdx++)
585         {
586                 const int               ix              = valNdx % framebufferW;
587                 const int               iy              = valNdx / framebufferW;
588                 const float             fx              = -1.0f + 2.0f*((float(ix) + 0.5f) / float(framebufferW));
589                 const float             fy              = -1.0f + 2.0f*((float(iy) + 0.5f) / float(framebufferH));
590
591                 positions[valNdx] = tcu::Vec2(fx, fy);
592         }
593
594         // Vertex inputs.
595         vertexArrays.push_back(glu::va::Float("a_position", 2, numValues, 0, (const float*)&positions[0]));
596
597         for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
598         {
599                 const Symbol&           symbol          = m_inputs[inputNdx];
600                 const std::string       attribName      = "a_" + symbol.name;
601                 const void*                     ptr                     = inputs[inputNdx];
602                 const glu::DataType     basicType       = symbol.varType.getBasicType();
603                 const int                       vecSize         = glu::getDataTypeScalarSize(basicType);
604
605                 if (glu::isDataTypeFloatOrVec(basicType))
606                         vertexArrays.push_back(glu::va::Float(attribName, vecSize, numValues, 0, (const float*)ptr));
607                 else if (glu::isDataTypeIntOrIVec(basicType))
608                         vertexArrays.push_back(glu::va::Int32(attribName, vecSize, numValues, 0, (const deInt32*)ptr));
609                 else if (glu::isDataTypeUintOrUVec(basicType))
610                         vertexArrays.push_back(glu::va::Uint32(attribName, vecSize, numValues, 0, (const deUint32*)ptr));
611                 else if (glu::isDataTypeMatrix(basicType))
612                 {
613                         int             numRows = glu::getDataTypeMatrixNumRows(basicType);
614                         int             numCols = glu::getDataTypeMatrixNumColumns(basicType);
615                         int             stride  = numRows * numCols * (int)sizeof(float);
616
617                         for (int colNdx = 0; colNdx < numCols; ++colNdx)
618                                 vertexArrays.push_back(glu::va::Float(attribName, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
619                 }
620                 else
621                         DE_ASSERT(false);
622         }
623
624         // Construct framebuffer.
625         gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
626
627         for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
628         {
629                 const Symbol&   output                  = *m_outputLayout.locationSymbols[outNdx];
630                 const deUint32  renderbuffer    = renderbuffers[outNdx];
631                 const deUint32  format                  = glu::getInternalFormat(getRenderbufferFormatForOutput(output.varType, useIntOutputs));
632
633                 gl.bindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
634                 gl.renderbufferStorage(GL_RENDERBUFFER, format, framebufferW, framebufferH);
635                 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0+outNdx, GL_RENDERBUFFER, renderbuffer);
636         }
637         gl.bindRenderbuffer(GL_RENDERBUFFER, 0);
638         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up framebuffer object");
639         TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
640
641         {
642                 vector<deUint32> drawBuffers(m_outputLayout.locationSymbols.size());
643                 for (int ndx = 0; ndx < (int)m_outputLayout.locationSymbols.size(); ndx++)
644                         drawBuffers[ndx] = GL_COLOR_ATTACHMENT0+ndx;
645                 gl.drawBuffers((int)drawBuffers.size(), &drawBuffers[0]);
646                 GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawBuffers()");
647         }
648
649         // Render
650         gl.viewport(0, 0, framebufferW, framebufferH);
651         glu::draw(m_renderCtx, this->getProgram(), (int)vertexArrays.size(), &vertexArrays[0],
652                           glu::pr::Points(numValues));
653         GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
654
655         // Read back pixels.
656         {
657                 tcu::TextureLevel       tmpBuf;
658
659                 // \todo [2013-08-07 pyry] Some fast-paths could be added here.
660
661                 for (int outNdx = 0; outNdx < (int)m_outputs.size(); ++outNdx)
662                 {
663                         const Symbol&                           output                  = m_outputs[outNdx];
664                         const int                                       outSize                 = output.varType.getScalarSize();
665                         const int                                       outVecSize              = glu::getDataTypeNumComponents(output.varType.getBasicType());
666                         const int                                       outNumLocs              = glu::getDataTypeNumLocations(output.varType.getBasicType());
667                         deUint32*                                       dstPtrBase              = static_cast<deUint32*>(outputs[outNdx]);
668                         const tcu::TextureFormat        format                  = getRenderbufferFormatForOutput(output.varType, useIntOutputs);
669                         const tcu::TextureFormat        readFormat              (tcu::TextureFormat::RGBA, format.type);
670                         const int                                       outLocation             = de::lookup(m_outputLayout.locationMap, output.name);
671
672                         tmpBuf.setStorage(readFormat, framebufferW, framebufferH);
673
674                         for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
675                         {
676                                 gl.readBuffer(GL_COLOR_ATTACHMENT0 + outLocation + locNdx);
677                                 glu::readPixels(m_renderCtx, 0, 0, tmpBuf.getAccess());
678                                 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels");
679
680                                 if (outSize == 4 && outNumLocs == 1)
681                                         deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues*outVecSize*sizeof(deUint32));
682                                 else
683                                 {
684                                         for (int valNdx = 0; valNdx < numValues; valNdx++)
685                                         {
686                                                 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx*4;
687                                                 deUint32*               dstPtr = &dstPtrBase[outSize*valNdx + outVecSize*locNdx];
688                                                 deMemcpy(dstPtr, srcPtr, outVecSize*sizeof(deUint32));
689                                         }
690                                 }
691                         }
692                 }
693         }
694
695         // \todo [2013-08-07 pyry] Clear draw buffers & viewport?
696         gl.bindFramebuffer(GL_FRAMEBUFFER, 0);
697 }
698
699 // VertexShaderExecutor
700
701 class VertexShaderExecutor : public FragmentOutExecutor
702 {
703 public:
704                                                                 VertexShaderExecutor    (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
705                                                                 ~VertexShaderExecutor   (void);
706
707         bool                                            isOk                                    (void) const                            { return m_program.isOk();                      }
708         void                                            log                                             (tcu::TestLog& dst) const       { dst << m_program;                                     }
709         deUint32                                        getProgram                              (void) const                            { return m_program.getProgram();        }
710
711 protected:
712         const glu::ShaderProgram        m_program;
713 };
714
715 VertexShaderExecutor::VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
716         : FragmentOutExecutor   (renderCtx, shaderSpec)
717         , m_program                             (renderCtx,
718                                                          glu::ProgramSources() << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_"))
719                                                                                                    << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "vtx_out_", "o_")))
720 {
721 }
722
723 VertexShaderExecutor::~VertexShaderExecutor (void)
724 {
725 }
726
727 // GeometryShaderExecutor
728
729 class GeometryShaderExecutor : public FragmentOutExecutor
730 {
731 public:
732         static GeometryShaderExecutor*  create                                  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
733
734                                                                         ~GeometryShaderExecutor (void);
735
736         bool                                                    isOk                                    (void) const                            { return m_program.isOk();                      }
737         void                                                    log                                             (tcu::TestLog& dst) const       { dst << m_program;                                     }
738         deUint32                                                getProgram                              (void) const                            { return m_program.getProgram();        }
739
740 protected:
741         const glu::ShaderProgram                m_program;
742
743 private:
744                                                                         GeometryShaderExecutor  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
745 };
746
747 GeometryShaderExecutor* GeometryShaderExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
748 {
749         if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
750                 checkExtension(renderCtx, "GL_EXT_geometry_shader");
751
752         return new GeometryShaderExecutor(renderCtx, shaderSpec);
753 }
754
755 GeometryShaderExecutor::GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
756         : FragmentOutExecutor   (renderCtx, shaderSpec)
757         , m_program                             (renderCtx,
758                                                          glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_"))
759                                                                                                    << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_"))
760                                                                                                    << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "geom_out_", "o_")))
761 {
762 }
763
764 GeometryShaderExecutor::~GeometryShaderExecutor (void)
765 {
766 }
767
768 // FragmentShaderExecutor
769
770 class FragmentShaderExecutor : public FragmentOutExecutor
771 {
772 public:
773                                                                 FragmentShaderExecutor  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
774                                                                 ~FragmentShaderExecutor (void);
775
776         bool                                            isOk                                    (void) const                            { return m_program.isOk();                      }
777         void                                            log                                             (tcu::TestLog& dst) const       { dst << m_program;                                     }
778         deUint32                                        getProgram                              (void) const                            { return m_program.getProgram();        }
779
780 protected:
781         const glu::ShaderProgram        m_program;
782 };
783
784 FragmentShaderExecutor::FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
785         : FragmentOutExecutor   (renderCtx, shaderSpec)
786         , m_program                             (renderCtx,
787                                                          glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_"))
788                                                                                                    << glu::FragmentSource(generateFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "vtx_out_", "o_")))
789 {
790 }
791
792 FragmentShaderExecutor::~FragmentShaderExecutor (void)
793 {
794 }
795
796 // Shared utilities for compute and tess executors
797
798 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
799 {
800         switch (glu::getDataTypeScalarSize(type))
801         {
802                 case 1:         return 4u;
803                 case 2:         return 8u;
804                 case 3:         return 16u;
805                 case 4:         return 16u;
806                 default:
807                         DE_ASSERT(false);
808                         return 0u;
809         }
810 }
811
812 class BufferIoExecutor : public ShaderExecutor
813 {
814 public:
815                                                 BufferIoExecutor        (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
816                                                 ~BufferIoExecutor       (void);
817
818         bool                            isOk                            (void) const                            { return m_program.isOk();                      }
819         void                            log                                     (tcu::TestLog& dst) const       { dst << m_program;                                     }
820         deUint32                        getProgram                      (void) const                            { return m_program.getProgram();        }
821
822 protected:
823         enum
824         {
825                 INPUT_BUFFER_BINDING    = 0,
826                 OUTPUT_BUFFER_BINDING   = 1,
827         };
828
829         void                            initBuffers                     (int numValues);
830         deUint32                        getInputBuffer          (void) const            { return *m_inputBuffer;                                        }
831         deUint32                        getOutputBuffer         (void) const            { return *m_outputBuffer;                                       }
832         deUint32                        getInputStride          (void) const            { return getLayoutStride(m_inputLayout);        }
833         deUint32                        getOutputStride         (void) const            { return getLayoutStride(m_outputLayout);       }
834
835         void                            uploadInputBuffer       (const void* const* inputPtrs, int numValues);
836         void                            readOutputBuffer        (void* const* outputPtrs, int numValues);
837
838         static void                     declareBufferBlocks     (std::ostream& src, const ShaderSpec& spec);
839         static void                     generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
840
841         glu::ShaderProgram      m_program;
842
843 private:
844         struct VarLayout
845         {
846                 deUint32                offset;
847                 deUint32                stride;
848                 deUint32                matrixStride;
849
850                 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
851         };
852
853         void                            resizeInputBuffer       (int newSize);
854         void                            resizeOutputBuffer      (int newSize);
855
856         static void                     computeVarLayout        (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
857         static deUint32         getLayoutStride         (const vector<VarLayout>& layout);
858
859         static void                     copyToBuffer            (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
860         static void                     copyFromBuffer          (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
861
862         glu::Buffer                     m_inputBuffer;
863         glu::Buffer                     m_outputBuffer;
864
865         vector<VarLayout>       m_inputLayout;
866         vector<VarLayout>       m_outputLayout;
867 };
868
869 BufferIoExecutor::BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
870         : ShaderExecutor        (renderCtx, shaderSpec)
871         , m_program                     (renderCtx, sources)
872         , m_inputBuffer         (renderCtx)
873         , m_outputBuffer        (renderCtx)
874 {
875         computeVarLayout(m_inputs, &m_inputLayout);
876         computeVarLayout(m_outputs, &m_outputLayout);
877 }
878
879 BufferIoExecutor::~BufferIoExecutor (void)
880 {
881 }
882
883 void BufferIoExecutor::resizeInputBuffer (int newSize)
884 {
885         const glw::Functions& gl = m_renderCtx.getFunctions();
886         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_inputBuffer);
887         gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
888         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate input buffer");
889 }
890
891 void BufferIoExecutor::resizeOutputBuffer (int newSize)
892 {
893         const glw::Functions& gl = m_renderCtx.getFunctions();
894         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_outputBuffer);
895         gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
896         GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate output buffer");
897 }
898
899 void BufferIoExecutor::initBuffers (int numValues)
900 {
901         const deUint32          inputStride                     = getLayoutStride(m_inputLayout);
902         const deUint32          outputStride            = getLayoutStride(m_outputLayout);
903         const int                       inputBufferSize         = numValues * inputStride;
904         const int                       outputBufferSize        = numValues * outputStride;
905
906         resizeInputBuffer(inputBufferSize);
907         resizeOutputBuffer(outputBufferSize);
908 }
909
910 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
911 {
912         deUint32        maxAlignment    = 0;
913         deUint32        curOffset               = 0;
914
915         DE_ASSERT(layout->empty());
916         layout->resize(symbols.size());
917
918         for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
919         {
920                 const Symbol&           symbol          = symbols[varNdx];
921                 const glu::DataType     basicType       = symbol.varType.getBasicType();
922                 VarLayout&                      layoutEntry     = (*layout)[varNdx];
923
924                 if (glu::isDataTypeScalarOrVector(basicType))
925                 {
926                         const deUint32  alignment       = getVecStd430ByteAlignment(basicType);
927                         const deUint32  size            = (deUint32)glu::getDataTypeScalarSize(basicType)*(int)sizeof(deUint32);
928
929                         curOffset               = (deUint32)deAlign32((int)curOffset, (int)alignment);
930                         maxAlignment    = de::max(maxAlignment, alignment);
931
932                         layoutEntry.offset                      = curOffset;
933                         layoutEntry.matrixStride        = 0;
934
935                         curOffset += size;
936                 }
937                 else if (glu::isDataTypeMatrix(basicType))
938                 {
939                         const int                               numVecs                 = glu::getDataTypeMatrixNumColumns(basicType);
940                         const glu::DataType             vecType                 = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
941                         const deUint32                  vecAlignment    = getVecStd430ByteAlignment(vecType);
942
943                         curOffset               = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
944                         maxAlignment    = de::max(maxAlignment, vecAlignment);
945
946                         layoutEntry.offset                      = curOffset;
947                         layoutEntry.matrixStride        = vecAlignment;
948
949                         curOffset += vecAlignment*numVecs;
950                 }
951                 else
952                         DE_ASSERT(false);
953         }
954
955         {
956                 const deUint32  totalSize       = (deUint32)deAlign32(curOffset, maxAlignment);
957
958                 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
959                         varIter->stride = totalSize;
960         }
961 }
962
963 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
964 {
965         return layout.empty() ? 0 : layout[0].stride;
966 }
967
968 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
969 {
970         if (varType.isBasicType())
971         {
972                 const glu::DataType             basicType               = varType.getBasicType();
973                 const bool                              isMatrix                = glu::isDataTypeMatrix(basicType);
974                 const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
975                 const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
976                 const int                               numComps                = scalarSize / numVecs;
977
978                 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
979                 {
980                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
981                         {
982                                 const int               srcOffset               = (int)sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
983                                 const int               dstOffset               = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
984                                 const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
985                                 deUint8*                dstPtr                  = (deUint8*)dstBasePtr + dstOffset;
986
987                                 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
988                         }
989                 }
990         }
991         else
992                 throw tcu::InternalError("Unsupported type");
993 }
994
995 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
996 {
997         if (varType.isBasicType())
998         {
999                 const glu::DataType             basicType               = varType.getBasicType();
1000                 const bool                              isMatrix                = glu::isDataTypeMatrix(basicType);
1001                 const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
1002                 const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1003                 const int                               numComps                = scalarSize / numVecs;
1004
1005                 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1006                 {
1007                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1008                         {
1009                                 const int               srcOffset               = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1010                                 const int               dstOffset               = (int)sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1011                                 const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
1012                                 deUint8*                dstPtr                  = (deUint8*)dstBasePtr + dstOffset;
1013
1014                                 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1015                         }
1016                 }
1017         }
1018         else
1019                 throw tcu::InternalError("Unsupported type");
1020 }
1021
1022 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1023 {
1024         const glw::Functions&   gl                              = m_renderCtx.getFunctions();
1025         const deUint32                  buffer                  = *m_inputBuffer;
1026         const deUint32                  inputStride             = getLayoutStride(m_inputLayout);
1027         const int                               inputBufferSize = inputStride*numValues;
1028
1029         if (inputBufferSize == 0)
1030                 return; // No inputs
1031
1032         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1033         void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, inputBufferSize, GL_MAP_WRITE_BIT);
1034         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1035         TCU_CHECK(mapPtr);
1036
1037         try
1038         {
1039                 DE_ASSERT(m_inputs.size() == m_inputLayout.size());
1040                 for (size_t inputNdx = 0; inputNdx < m_inputs.size(); ++inputNdx)
1041                 {
1042                         const glu::VarType&             varType         = m_inputs[inputNdx].varType;
1043                         const VarLayout&                layout          = m_inputLayout[inputNdx];
1044
1045                         copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], mapPtr);
1046                 }
1047         }
1048         catch (...)
1049         {
1050                 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1051                 throw;
1052         }
1053
1054         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1055         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1056 }
1057
1058 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1059 {
1060         const glw::Functions&   gl                                      = m_renderCtx.getFunctions();
1061         const deUint32                  buffer                          = *m_outputBuffer;
1062         const deUint32                  outputStride            = getLayoutStride(m_outputLayout);
1063         const int                               outputBufferSize        = numValues*outputStride;
1064
1065         DE_ASSERT(outputBufferSize > 0); // At least some outputs are required.
1066
1067         gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1068         void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, outputBufferSize, GL_MAP_READ_BIT);
1069         GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1070         TCU_CHECK(mapPtr);
1071
1072         try
1073         {
1074                 DE_ASSERT(m_outputs.size() == m_outputLayout.size());
1075                 for (size_t outputNdx = 0; outputNdx < m_outputs.size(); ++outputNdx)
1076                 {
1077                         const glu::VarType&             varType         = m_outputs[outputNdx].varType;
1078                         const VarLayout&                layout          = m_outputLayout[outputNdx];
1079
1080                         copyFromBuffer(varType, layout, numValues, mapPtr, outputPtrs[outputNdx]);
1081                 }
1082         }
1083         catch (...)
1084         {
1085                 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1086                 throw;
1087         }
1088
1089         gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1090         GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1091 }
1092
1093 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1094 {
1095         // Input struct
1096         if (!spec.inputs.empty())
1097         {
1098                 glu::StructType inputStruct("Inputs");
1099                 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1100                         inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1101                 src << glu::declare(&inputStruct) << ";\n";
1102         }
1103
1104         // Output struct
1105         {
1106                 glu::StructType outputStruct("Outputs");
1107                 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1108                         outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1109                 src << glu::declare(&outputStruct) << ";\n";
1110         }
1111
1112         src << "\n";
1113
1114         if (!spec.inputs.empty())
1115         {
1116                 src     << "layout(binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1117                         << "{\n"
1118                         << "    Inputs inputs[];\n"
1119                         << "};\n";
1120         }
1121
1122         src     << "layout(binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1123                 << "{\n"
1124                 << "    Outputs outputs[];\n"
1125                 << "};\n"
1126                 << "\n";
1127 }
1128
1129 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1130 {
1131         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1132                 src << "\t" << glu::declare(symIter->varType, symIter->name) << " = inputs[" << invocationNdxName << "]." << symIter->name << ";\n";
1133
1134         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1135                 src << "\t" << glu::declare(symIter->varType, symIter->name) << ";\n";
1136
1137         src << "\n";
1138
1139         {
1140                 std::istringstream      opSrc   (spec.source);
1141                 std::string                     line;
1142
1143                 while (std::getline(opSrc, line))
1144                         src << "\t" << line << "\n";
1145         }
1146
1147         src << "\n";
1148         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1149                 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1150 }
1151
1152 // ComputeShaderExecutor
1153
1154 class ComputeShaderExecutor : public BufferIoExecutor
1155 {
1156 public:
1157                                                 ComputeShaderExecutor   (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1158                                                 ~ComputeShaderExecutor  (void);
1159
1160         void                            execute                                 (int numValues, const void* const* inputs, void* const* outputs);
1161
1162 protected:
1163         static std::string      generateComputeShader   (const ShaderSpec& spec);
1164
1165         tcu::IVec3                      m_maxWorkSize;
1166 };
1167
1168 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
1169 {
1170         std::ostringstream src;
1171
1172         src << glu::getGLSLVersionDeclaration(spec.version) << "\n";
1173
1174         if (!spec.globalDeclarations.empty())
1175                 src << spec.globalDeclarations << "\n";
1176
1177         src << "layout(local_size_x = 1) in;\n"
1178                 << "\n";
1179
1180         declareBufferBlocks(src, spec);
1181
1182         src << "void main (void)\n"
1183                 << "{\n"
1184                 << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
1185                 << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
1186
1187         generateExecBufferIo(src, spec, "invocationNdx");
1188
1189         src << "}\n";
1190
1191         return src.str();
1192 }
1193
1194 ComputeShaderExecutor::ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1195         : BufferIoExecutor      (renderCtx, shaderSpec,
1196                                                  glu::ProgramSources() << glu::ComputeSource(generateComputeShader(shaderSpec)))
1197 {
1198         m_maxWorkSize   = tcu::IVec3(128,128,64); // Minimum in 3plus
1199 }
1200
1201 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1202 {
1203 }
1204
1205 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1206 {
1207         const glw::Functions&   gl                                              = m_renderCtx.getFunctions();
1208         const int                               maxValuesPerInvocation  = m_maxWorkSize[0];
1209         const deUint32                  inputStride                             = getInputStride();
1210         const deUint32                  outputStride                    = getOutputStride();
1211
1212         initBuffers(numValues);
1213
1214         // Setup input buffer & copy data
1215         uploadInputBuffer(inputs, numValues);
1216
1217         // Perform compute invocations
1218         {
1219                 int curOffset = 0;
1220                 while (curOffset < numValues)
1221                 {
1222                         const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
1223
1224                         if (inputStride > 0)
1225                                 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer(), curOffset*inputStride, numToExec*inputStride);
1226
1227                         gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer(), curOffset*outputStride, numToExec*outputStride);
1228                         GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferRange(GL_SHADER_STORAGE_BUFFER)");
1229
1230                         gl.dispatchCompute(numToExec, 1, 1);
1231                         GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1232
1233                         curOffset += numToExec;
1234                 }
1235         }
1236
1237         // Read back data
1238         readOutputBuffer(outputs, numValues);
1239 }
1240
1241 // Tessellation utils
1242
1243 static std::string generateVertexShaderForTess (glu::GLSLVersion version)
1244 {
1245         std::ostringstream      src;
1246
1247         src << glu::getGLSLVersionDeclaration(version) << "\n";
1248
1249         src << "void main (void)\n{\n"
1250                 << "    gl_Position = vec4(gl_VertexID/2, gl_VertexID%2, 0.0, 1.0);\n"
1251                 << "}\n";
1252
1253         return src.str();
1254 }
1255
1256 void checkTessSupport (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, glu::ShaderType stage)
1257 {
1258         const int numBlockRequired = 2; // highest binding is always 1 (output) i.e. count == 2
1259
1260         if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1261                 checkExtension(renderCtx, "GL_EXT_tessellation_shader");
1262
1263         if (stage == glu::SHADERTYPE_TESSELLATION_CONTROL)
1264                 checkLimit(renderCtx, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, numBlockRequired);
1265         else if (stage == glu::SHADERTYPE_TESSELLATION_EVALUATION)
1266                 checkLimit(renderCtx, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, numBlockRequired);
1267         else
1268                 DE_ASSERT(false);
1269 }
1270
1271 // TessControlExecutor
1272
1273 class TessControlExecutor : public BufferIoExecutor
1274 {
1275 public:
1276         static TessControlExecutor*     create                                          (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1277
1278                                                                 ~TessControlExecutor            (void);
1279
1280         void                                            execute                                         (int numValues, const void* const* inputs, void* const* outputs);
1281
1282
1283 protected:
1284         static std::string                      generateTessControlShader       (const ShaderSpec& shaderSpec);
1285
1286 private:
1287                                                                 TessControlExecutor                     (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1288 };
1289
1290 TessControlExecutor* TessControlExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1291 {
1292         checkTessSupport(renderCtx, shaderSpec, glu::SHADERTYPE_TESSELLATION_CONTROL);
1293
1294         return new TessControlExecutor(renderCtx, shaderSpec);
1295 }
1296
1297 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
1298 {
1299         std::ostringstream src;
1300
1301         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1302
1303         if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1304                 src << "#extension GL_EXT_tessellation_shader : require\n";
1305
1306         if (!shaderSpec.globalDeclarations.empty())
1307                 src << shaderSpec.globalDeclarations << "\n";
1308
1309         src << "\nlayout(vertices = 1) out;\n\n";
1310
1311         declareBufferBlocks(src, shaderSpec);
1312
1313         src << "void main (void)\n{\n";
1314
1315         for (int ndx = 0; ndx < 2; ndx++)
1316                 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1317
1318         for (int ndx = 0; ndx < 4; ndx++)
1319                 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1320
1321         src << "\n"
1322                 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
1323
1324         generateExecBufferIo(src, shaderSpec, "invocationId");
1325
1326         src << "}\n";
1327
1328         return src.str();
1329 }
1330
1331 static std::string generateEmptyTessEvalShader (glu::GLSLVersion version)
1332 {
1333         std::ostringstream src;
1334
1335         src << glu::getGLSLVersionDeclaration(version) << "\n";
1336
1337         if (glu::glslVersionIsES(version) && version <= glu::GLSL_VERSION_310_ES)
1338                 src << "#extension GL_EXT_tessellation_shader : require\n\n";
1339
1340         src << "layout(triangles, ccw) in;\n";
1341
1342         src << "\nvoid main (void)\n{\n"
1343                 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
1344                 << "}\n";
1345
1346         return src.str();
1347 }
1348
1349 TessControlExecutor::TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1350         : BufferIoExecutor      (renderCtx, shaderSpec, glu::ProgramSources()
1351                                                         << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1352                                                         << glu::TessellationControlSource(generateTessControlShader(shaderSpec))
1353                                                         << glu::TessellationEvaluationSource(generateEmptyTessEvalShader(shaderSpec.version))
1354                                                         << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1355 {
1356 }
1357
1358 TessControlExecutor::~TessControlExecutor (void)
1359 {
1360 }
1361
1362 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1363 {
1364         const glw::Functions&   gl      = m_renderCtx.getFunctions();
1365
1366         initBuffers(numValues);
1367
1368         // Setup input buffer & copy data
1369         uploadInputBuffer(inputs, numValues);
1370
1371         if (!m_inputs.empty())
1372                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1373
1374         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1375
1376         deUint32 vertexArray;
1377         gl.genVertexArrays(1, &vertexArray);
1378         gl.bindVertexArray(vertexArray);
1379
1380         // Render patches
1381         gl.patchParameteri(GL_PATCH_VERTICES, 3);
1382         gl.drawArrays(GL_PATCHES, 0, 3*numValues);
1383
1384         gl.bindVertexArray(0);
1385         gl.deleteVertexArrays(1, &vertexArray);
1386
1387         // Read back data
1388         readOutputBuffer(outputs, numValues);
1389 }
1390
1391 // TessEvaluationExecutor
1392
1393 class TessEvaluationExecutor : public BufferIoExecutor
1394 {
1395 public:
1396         static TessEvaluationExecutor*  create                                  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1397
1398                                                                         ~TessEvaluationExecutor (void);
1399
1400         void                                                    execute                                 (int numValues, const void* const* inputs, void* const* outputs);
1401
1402
1403 protected:
1404         static std::string                              generateTessEvalShader  (const ShaderSpec& shaderSpec);
1405
1406 private:
1407                                                                         TessEvaluationExecutor  (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1408 };
1409
1410 TessEvaluationExecutor* TessEvaluationExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1411 {
1412         checkTessSupport(renderCtx, shaderSpec, glu::SHADERTYPE_TESSELLATION_EVALUATION);
1413
1414         return new TessEvaluationExecutor(renderCtx, shaderSpec);
1415 }
1416
1417 static std::string generatePassthroughTessControlShader (glu::GLSLVersion version)
1418 {
1419         std::ostringstream src;
1420
1421         src << glu::getGLSLVersionDeclaration(version) << "\n";
1422
1423         if (glu::glslVersionIsES(version) && version <= glu::GLSL_VERSION_310_ES)
1424                 src << "#extension GL_EXT_tessellation_shader : require\n\n";
1425
1426         src << "layout(vertices = 1) out;\n\n";
1427
1428         src << "void main (void)\n{\n";
1429
1430         for (int ndx = 0; ndx < 2; ndx++)
1431                 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1432
1433         for (int ndx = 0; ndx < 4; ndx++)
1434                 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1435
1436         src << "}\n";
1437
1438         return src.str();
1439 }
1440
1441 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
1442 {
1443         std::ostringstream src;
1444
1445         src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1446
1447         if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1448                 src << "#extension GL_EXT_tessellation_shader : require\n";
1449
1450         if (!shaderSpec.globalDeclarations.empty())
1451                 src << shaderSpec.globalDeclarations << "\n";
1452
1453         src << "\n";
1454
1455         src << "layout(isolines, equal_spacing) in;\n\n";
1456
1457         declareBufferBlocks(src, shaderSpec);
1458
1459         src << "void main (void)\n{\n"
1460                 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
1461                 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
1462
1463         generateExecBufferIo(src, shaderSpec, "invocationId");
1464
1465         src     << "}\n";
1466
1467         return src.str();
1468 }
1469
1470 TessEvaluationExecutor::TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1471         : BufferIoExecutor      (renderCtx, shaderSpec, glu::ProgramSources()
1472                                                         << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1473                                                         << glu::TessellationControlSource(generatePassthroughTessControlShader(shaderSpec.version))
1474                                                         << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec))
1475                                                         << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1476 {
1477 }
1478
1479 TessEvaluationExecutor::~TessEvaluationExecutor (void)
1480 {
1481 }
1482
1483 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1484 {
1485         const glw::Functions&   gl                              = m_renderCtx.getFunctions();
1486         const int                               alignedValues   = deAlign32(numValues, 2);
1487
1488         // Initialize buffers with aligned value count to make room for padding
1489         initBuffers(alignedValues);
1490
1491         // Setup input buffer & copy data
1492         uploadInputBuffer(inputs, numValues);
1493
1494         // \todo [2014-06-26 pyry] Duplicate last value in the buffer to prevent infinite loops for example?
1495
1496         if (!m_inputs.empty())
1497                 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1498
1499         gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1500
1501         deUint32 vertexArray;
1502         gl.genVertexArrays(1, &vertexArray);
1503         gl.bindVertexArray(vertexArray);
1504
1505         // Render patches
1506         gl.patchParameteri(GL_PATCH_VERTICES, 2);
1507         gl.drawArrays(GL_PATCHES, 0, alignedValues);
1508
1509         gl.bindVertexArray(0);
1510         gl.deleteVertexArrays(1, &vertexArray);
1511
1512         // Read back data
1513         readOutputBuffer(outputs, numValues);
1514 }
1515
1516 // Utilities
1517
1518 ShaderExecutor* createExecutor (const glu::RenderContext& renderCtx, glu::ShaderType shaderType, const ShaderSpec& shaderSpec)
1519 {
1520         switch (shaderType)
1521         {
1522                 case glu::SHADERTYPE_VERTEX:                                    return new VertexShaderExecutor                 (renderCtx, shaderSpec);
1523                 case glu::SHADERTYPE_TESSELLATION_CONTROL:              return TessControlExecutor::create              (renderCtx, shaderSpec);
1524                 case glu::SHADERTYPE_TESSELLATION_EVALUATION:   return TessEvaluationExecutor::create   (renderCtx, shaderSpec);
1525                 case glu::SHADERTYPE_GEOMETRY:                                  return GeometryShaderExecutor::create   (renderCtx, shaderSpec);
1526                 case glu::SHADERTYPE_FRAGMENT:                                  return new FragmentShaderExecutor               (renderCtx, shaderSpec);
1527                 case glu::SHADERTYPE_COMPUTE:                                   return new ComputeShaderExecutor                (renderCtx, shaderSpec);
1528                 default:
1529                         throw tcu::InternalError("Unsupported shader type");
1530         }
1531 }
1532
1533 } // ShaderExecUtil
1534 } // gls
1535 } // deqp