1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL (ES) Module
3 * -----------------------------------------------
5 * Copyright 2014 The Android Open Source Project
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
21 * \brief Shader execution utilities.
22 *//*--------------------------------------------------------------------*/
24 #include "glsShaderExecUtil.hpp"
25 #include "gluRenderContext.hpp"
26 #include "gluDrawUtil.hpp"
27 #include "gluObjectWrapper.hpp"
28 #include "gluShaderProgram.hpp"
29 #include "gluTextureUtil.hpp"
30 #include "gluProgramInterfaceQuery.hpp"
31 #include "gluPixelTransfer.hpp"
32 #include "gluStrUtil.hpp"
33 #include "tcuTestLog.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deSTLUtil.hpp"
37 #include "deStringUtil.hpp"
38 #include "deUniquePtr.hpp"
48 namespace ShaderExecUtil
53 static bool isExtensionSupported (const glu::RenderContext& renderCtx, const std::string& extension)
55 const glw::Functions& gl = renderCtx.getFunctions();
58 gl.getIntegerv(GL_NUM_EXTENSIONS, &numExts);
60 for (int ndx = 0; ndx < numExts; ndx++)
62 const char* curExt = (const char*)gl.getStringi(GL_EXTENSIONS, ndx);
64 if (extension == curExt)
71 static void checkExtension (const glu::RenderContext& renderCtx, const std::string& extension)
73 if (!isExtensionSupported(renderCtx, extension))
74 throw tcu::NotSupportedError(extension + " is not supported");
77 static void checkLimit (const glu::RenderContext& renderCtx, deUint32 pname, int required)
79 const glw::Functions& gl = renderCtx.getFunctions();
80 int implementationLimit = -1;
83 gl.getIntegerv(pname, &implementationLimit);
84 error = gl.getError();
86 if (error != GL_NO_ERROR)
87 throw tcu::TestError("Failed to query " + de::toString(glu::getGettableStateStr(pname)) + " - got " + de::toString(glu::getErrorStr(error)));
88 if (implementationLimit < required)
89 throw tcu::NotSupportedError("Test requires " + de::toString(glu::getGettableStateStr(pname)) + " >= " + de::toString(required) + ", got " + de::toString(implementationLimit));
94 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
96 const bool usesInout = glu::glslVersionUsesInOutQualifiers(shaderSpec.version);
97 const char* in = usesInout ? "in" : "attribute";
98 const char* out = usesInout ? "out" : "varying";
99 std::ostringstream src;
101 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
103 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
105 if (!shaderSpec.globalDeclarations.empty())
106 src << shaderSpec.globalDeclarations << "\n";
108 src << in << " highp vec4 a_position;\n";
110 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
111 src << in << " " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
113 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
115 DE_ASSERT(output->varType.isBasicType());
117 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
119 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
120 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
121 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
123 src << "flat " << out << " " << glu::declare(intType, outputPrefix + output->name) << ";\n";
126 src << "flat " << out << " " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
130 << "void main (void)\n"
132 << " gl_Position = a_position;\n"
133 << " gl_PointSize = 1.0;\n\n";
135 // Declare & fetch local input variables
136 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
137 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
139 // Declare local output variables
140 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
141 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
143 // Operation - indented to correct level.
145 std::istringstream opSrc (shaderSpec.source);
148 while (std::getline(opSrc, line))
149 src << "\t" << line << "\n";
152 // Assignments to outputs.
153 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
155 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
157 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
158 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
160 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
163 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
171 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
173 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
174 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
176 std::ostringstream src;
178 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
180 if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
181 src << "#extension GL_EXT_geometry_shader : require\n";
183 if (!shaderSpec.globalDeclarations.empty())
184 src << shaderSpec.globalDeclarations << "\n";
186 src << "layout(points) in;\n"
187 << "layout(points, max_vertices = 1) out;\n";
189 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
190 src << "flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
192 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
194 DE_ASSERT(output->varType.isBasicType());
196 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
198 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
199 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
200 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
202 src << "flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
205 src << "flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
209 << "void main (void)\n"
211 << " gl_Position = gl_in[0].gl_Position;\n\n";
213 // Fetch input variables
214 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
215 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
217 // Declare local output variables.
218 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
219 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
223 // Operation - indented to correct level.
225 std::istringstream opSrc (shaderSpec.source);
228 while (std::getline(opSrc, line))
229 src << "\t" << line << "\n";
232 // Assignments to outputs.
233 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
235 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
237 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
238 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
240 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
243 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
246 src << " EmitVertex();\n"
247 << " EndPrimitive();\n"
253 static std::string generateEmptyFragmentSource (glu::GLSLVersion version)
255 const bool customOut = glu::glslVersionUsesInOutQualifiers(version);
256 std::ostringstream src;
258 src << glu::getGLSLVersionDeclaration(version) << "\n";
260 // \todo [2013-08-05 pyry] Do we need one dummy output?
262 src << "void main (void)\n{\n";
264 src << " gl_FragColor = vec4(0.0);\n";
270 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
272 // flat qualifier is not present in earlier versions?
273 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
275 std::ostringstream src;
277 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"
278 << "in highp vec4 a_position;\n";
280 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
282 src << "in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
283 << "flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
286 src << "\nvoid main (void)\n{\n"
287 << " gl_Position = a_position;\n"
288 << " gl_PointSize = 1.0;\n";
290 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
291 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
298 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
300 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
302 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
304 const Symbol& output = shaderSpec.outputs[outNdx];
305 const int location = de::lookup(outLocationMap, output.name);
306 const std::string outVarName = outputPrefix + output.name;
307 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
309 TCU_CHECK_INTERNAL(output.varType.isBasicType());
311 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
313 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
314 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
315 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
317 decl.varType = uintType;
318 src << decl << ";\n";
320 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
322 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
323 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
324 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP);
326 decl.varType = intType;
327 src << decl << ";\n";
329 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
331 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
332 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
333 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
334 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
336 decl.varType = uintType;
337 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
339 decl.name = outVarName + "_" + de::toString(vecNdx);
340 decl.layout.location = location + vecNdx;
341 src << decl << ";\n";
345 src << decl << ";\n";
349 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix)
351 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
353 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
354 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
355 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
357 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
359 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
361 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
363 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << valuePrefix << output->name << "[" << vecNdx << "];\n";
365 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
367 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
368 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
370 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
373 src << "\t" << outputPrefix << output->name << " = " << valuePrefix << output->name << ";\n";
377 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
379 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
381 std::ostringstream src;
383 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
385 if (!shaderSpec.globalDeclarations.empty())
386 src << shaderSpec.globalDeclarations << "\n";
388 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
389 src << "flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
391 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
393 src << "\nvoid main (void)\n{\n";
395 // Declare & fetch local input variables
396 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
397 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
399 // Declare output variables
400 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
401 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
403 // Operation - indented to correct level.
405 std::istringstream opSrc (shaderSpec.source);
408 while (std::getline(opSrc, line))
409 src << "\t" << line << "\n";
412 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix);
419 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
421 DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
423 std::ostringstream src;
425 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
427 if (!shaderSpec.globalDeclarations.empty())
428 src << shaderSpec.globalDeclarations << "\n";
430 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
432 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
434 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
435 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
436 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
438 src << "flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
441 src << "flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
444 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
446 src << "\nvoid main (void)\n{\n";
448 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
457 ShaderExecutor::ShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
458 : m_renderCtx (renderCtx)
459 , m_inputs (shaderSpec.inputs)
460 , m_outputs (shaderSpec.outputs)
464 ShaderExecutor::~ShaderExecutor (void)
468 void ShaderExecutor::useProgram (void)
471 m_renderCtx.getFunctions().useProgram(getProgram());
474 // FragmentOutExecutor
476 struct FragmentOutputLayout
478 std::vector<const Symbol*> locationSymbols; //! Symbols by location
479 std::map<std::string, int> locationMap; //! Map from symbol name to start location
482 class FragmentOutExecutor : public ShaderExecutor
485 FragmentOutExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
486 ~FragmentOutExecutor (void);
488 void execute (int numValues, const void* const* inputs, void* const* outputs);
491 const FragmentOutputLayout m_outputLayout;
494 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
496 FragmentOutputLayout ret;
499 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
501 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
503 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
504 de::insert(ret.locationMap, it->name, location);
505 location += numLocations;
507 for (int ndx = 0; ndx < numLocations; ++ndx)
508 ret.locationSymbols.push_back(&*it);
514 inline bool hasFloatRenderTargets (const glu::RenderContext& renderCtx)
516 glu::ContextType type = renderCtx.getType();
517 return glu::isContextTypeGLCore(type);
520 FragmentOutExecutor::FragmentOutExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
521 : ShaderExecutor (renderCtx, shaderSpec)
522 , m_outputLayout (computeFragmentOutputLayout(m_outputs))
526 FragmentOutExecutor::~FragmentOutExecutor (void)
530 inline int queryInt (const glw::Functions& gl, deUint32 pname)
533 gl.getIntegerv(pname, &value);
537 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
539 const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
541 tcu::TextureFormat::R,
542 tcu::TextureFormat::RG,
543 tcu::TextureFormat::RGBA, // No RGB variants available.
544 tcu::TextureFormat::RGBA
547 const glu::DataType basicType = outputType.getBasicType();
548 const int numComps = glu::getDataTypeNumComponents(basicType);
549 tcu::TextureFormat::ChannelType channelType;
551 switch (glu::getDataTypeScalarType(basicType))
553 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break;
554 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break;
555 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break;
556 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break;
558 throw tcu::InternalError("Invalid output type");
561 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
563 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
566 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
568 const glw::Functions& gl = m_renderCtx.getFunctions();
569 const bool useIntOutputs = !hasFloatRenderTargets(m_renderCtx);
570 const int maxRenderbufferSize = queryInt(gl, GL_MAX_RENDERBUFFER_SIZE);
571 const int framebufferW = de::min(maxRenderbufferSize, numValues);
572 const int framebufferH = (numValues / framebufferW) + ((numValues % framebufferW != 0) ? 1 : 0);
574 glu::Framebuffer framebuffer (m_renderCtx);
575 glu::RenderbufferVector renderbuffers (m_renderCtx, m_outputLayout.locationSymbols.size());
577 vector<glu::VertexArrayBinding> vertexArrays;
578 vector<tcu::Vec2> positions (numValues);
580 if (framebufferH > maxRenderbufferSize)
581 throw tcu::NotSupportedError("Value count is too high for maximum supported renderbuffer size");
583 // Compute positions - 1px points are used to drive fragment shading.
584 for (int valNdx = 0; valNdx < numValues; valNdx++)
586 const int ix = valNdx % framebufferW;
587 const int iy = valNdx / framebufferW;
588 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(framebufferW));
589 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(framebufferH));
591 positions[valNdx] = tcu::Vec2(fx, fy);
595 vertexArrays.push_back(glu::va::Float("a_position", 2, numValues, 0, (const float*)&positions[0]));
597 for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
599 const Symbol& symbol = m_inputs[inputNdx];
600 const std::string attribName = "a_" + symbol.name;
601 const void* ptr = inputs[inputNdx];
602 const glu::DataType basicType = symbol.varType.getBasicType();
603 const int vecSize = glu::getDataTypeScalarSize(basicType);
605 if (glu::isDataTypeFloatOrVec(basicType))
606 vertexArrays.push_back(glu::va::Float(attribName, vecSize, numValues, 0, (const float*)ptr));
607 else if (glu::isDataTypeIntOrIVec(basicType))
608 vertexArrays.push_back(glu::va::Int32(attribName, vecSize, numValues, 0, (const deInt32*)ptr));
609 else if (glu::isDataTypeUintOrUVec(basicType))
610 vertexArrays.push_back(glu::va::Uint32(attribName, vecSize, numValues, 0, (const deUint32*)ptr));
611 else if (glu::isDataTypeMatrix(basicType))
613 int numRows = glu::getDataTypeMatrixNumRows(basicType);
614 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
615 int stride = numRows * numCols * (int)sizeof(float);
617 for (int colNdx = 0; colNdx < numCols; ++colNdx)
618 vertexArrays.push_back(glu::va::Float(attribName, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
624 // Construct framebuffer.
625 gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
627 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
629 const Symbol& output = *m_outputLayout.locationSymbols[outNdx];
630 const deUint32 renderbuffer = renderbuffers[outNdx];
631 const deUint32 format = glu::getInternalFormat(getRenderbufferFormatForOutput(output.varType, useIntOutputs));
633 gl.bindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
634 gl.renderbufferStorage(GL_RENDERBUFFER, format, framebufferW, framebufferH);
635 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0+outNdx, GL_RENDERBUFFER, renderbuffer);
637 gl.bindRenderbuffer(GL_RENDERBUFFER, 0);
638 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up framebuffer object");
639 TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
642 vector<deUint32> drawBuffers(m_outputLayout.locationSymbols.size());
643 for (int ndx = 0; ndx < (int)m_outputLayout.locationSymbols.size(); ndx++)
644 drawBuffers[ndx] = GL_COLOR_ATTACHMENT0+ndx;
645 gl.drawBuffers((int)drawBuffers.size(), &drawBuffers[0]);
646 GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawBuffers()");
650 gl.viewport(0, 0, framebufferW, framebufferH);
651 glu::draw(m_renderCtx, this->getProgram(), (int)vertexArrays.size(), &vertexArrays[0],
652 glu::pr::Points(numValues));
653 GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
657 tcu::TextureLevel tmpBuf;
659 // \todo [2013-08-07 pyry] Some fast-paths could be added here.
661 for (int outNdx = 0; outNdx < (int)m_outputs.size(); ++outNdx)
663 const Symbol& output = m_outputs[outNdx];
664 const int outSize = output.varType.getScalarSize();
665 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
666 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
667 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
668 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, useIntOutputs);
669 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type);
670 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
672 tmpBuf.setStorage(readFormat, framebufferW, framebufferH);
674 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
676 gl.readBuffer(GL_COLOR_ATTACHMENT0 + outLocation + locNdx);
677 glu::readPixels(m_renderCtx, 0, 0, tmpBuf.getAccess());
678 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels");
680 if (outSize == 4 && outNumLocs == 1)
681 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues*outVecSize*sizeof(deUint32));
684 for (int valNdx = 0; valNdx < numValues; valNdx++)
686 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx*4;
687 deUint32* dstPtr = &dstPtrBase[outSize*valNdx + outVecSize*locNdx];
688 deMemcpy(dstPtr, srcPtr, outVecSize*sizeof(deUint32));
695 // \todo [2013-08-07 pyry] Clear draw buffers & viewport?
696 gl.bindFramebuffer(GL_FRAMEBUFFER, 0);
699 // VertexShaderExecutor
701 class VertexShaderExecutor : public FragmentOutExecutor
704 VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
705 ~VertexShaderExecutor (void);
707 bool isOk (void) const { return m_program.isOk(); }
708 void log (tcu::TestLog& dst) const { dst << m_program; }
709 deUint32 getProgram (void) const { return m_program.getProgram(); }
712 const glu::ShaderProgram m_program;
715 VertexShaderExecutor::VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
716 : FragmentOutExecutor (renderCtx, shaderSpec)
717 , m_program (renderCtx,
718 glu::ProgramSources() << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_"))
719 << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "vtx_out_", "o_")))
723 VertexShaderExecutor::~VertexShaderExecutor (void)
727 // GeometryShaderExecutor
729 class GeometryShaderExecutor : public FragmentOutExecutor
732 static GeometryShaderExecutor* create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
734 ~GeometryShaderExecutor (void);
736 bool isOk (void) const { return m_program.isOk(); }
737 void log (tcu::TestLog& dst) const { dst << m_program; }
738 deUint32 getProgram (void) const { return m_program.getProgram(); }
741 const glu::ShaderProgram m_program;
744 GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
747 GeometryShaderExecutor* GeometryShaderExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
749 if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
750 checkExtension(renderCtx, "GL_EXT_geometry_shader");
752 return new GeometryShaderExecutor(renderCtx, shaderSpec);
755 GeometryShaderExecutor::GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
756 : FragmentOutExecutor (renderCtx, shaderSpec)
757 , m_program (renderCtx,
758 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_"))
759 << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_"))
760 << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "geom_out_", "o_")))
764 GeometryShaderExecutor::~GeometryShaderExecutor (void)
768 // FragmentShaderExecutor
770 class FragmentShaderExecutor : public FragmentOutExecutor
773 FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
774 ~FragmentShaderExecutor (void);
776 bool isOk (void) const { return m_program.isOk(); }
777 void log (tcu::TestLog& dst) const { dst << m_program; }
778 deUint32 getProgram (void) const { return m_program.getProgram(); }
781 const glu::ShaderProgram m_program;
784 FragmentShaderExecutor::FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
785 : FragmentOutExecutor (renderCtx, shaderSpec)
786 , m_program (renderCtx,
787 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_"))
788 << glu::FragmentSource(generateFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "vtx_out_", "o_")))
792 FragmentShaderExecutor::~FragmentShaderExecutor (void)
796 // Shared utilities for compute and tess executors
798 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
800 switch (glu::getDataTypeScalarSize(type))
812 class BufferIoExecutor : public ShaderExecutor
815 BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
816 ~BufferIoExecutor (void);
818 bool isOk (void) const { return m_program.isOk(); }
819 void log (tcu::TestLog& dst) const { dst << m_program; }
820 deUint32 getProgram (void) const { return m_program.getProgram(); }
825 INPUT_BUFFER_BINDING = 0,
826 OUTPUT_BUFFER_BINDING = 1,
829 void initBuffers (int numValues);
830 deUint32 getInputBuffer (void) const { return *m_inputBuffer; }
831 deUint32 getOutputBuffer (void) const { return *m_outputBuffer; }
832 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); }
833 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); }
835 void uploadInputBuffer (const void* const* inputPtrs, int numValues);
836 void readOutputBuffer (void* const* outputPtrs, int numValues);
838 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec);
839 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
841 glu::ShaderProgram m_program;
848 deUint32 matrixStride;
850 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
853 void resizeInputBuffer (int newSize);
854 void resizeOutputBuffer (int newSize);
856 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
857 static deUint32 getLayoutStride (const vector<VarLayout>& layout);
859 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
860 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
862 glu::Buffer m_inputBuffer;
863 glu::Buffer m_outputBuffer;
865 vector<VarLayout> m_inputLayout;
866 vector<VarLayout> m_outputLayout;
869 BufferIoExecutor::BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
870 : ShaderExecutor (renderCtx, shaderSpec)
871 , m_program (renderCtx, sources)
872 , m_inputBuffer (renderCtx)
873 , m_outputBuffer (renderCtx)
875 computeVarLayout(m_inputs, &m_inputLayout);
876 computeVarLayout(m_outputs, &m_outputLayout);
879 BufferIoExecutor::~BufferIoExecutor (void)
883 void BufferIoExecutor::resizeInputBuffer (int newSize)
885 const glw::Functions& gl = m_renderCtx.getFunctions();
886 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_inputBuffer);
887 gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
888 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate input buffer");
891 void BufferIoExecutor::resizeOutputBuffer (int newSize)
893 const glw::Functions& gl = m_renderCtx.getFunctions();
894 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_outputBuffer);
895 gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
896 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate output buffer");
899 void BufferIoExecutor::initBuffers (int numValues)
901 const deUint32 inputStride = getLayoutStride(m_inputLayout);
902 const deUint32 outputStride = getLayoutStride(m_outputLayout);
903 const int inputBufferSize = numValues * inputStride;
904 const int outputBufferSize = numValues * outputStride;
906 resizeInputBuffer(inputBufferSize);
907 resizeOutputBuffer(outputBufferSize);
910 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
912 deUint32 maxAlignment = 0;
913 deUint32 curOffset = 0;
915 DE_ASSERT(layout->empty());
916 layout->resize(symbols.size());
918 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
920 const Symbol& symbol = symbols[varNdx];
921 const glu::DataType basicType = symbol.varType.getBasicType();
922 VarLayout& layoutEntry = (*layout)[varNdx];
924 if (glu::isDataTypeScalarOrVector(basicType))
926 const deUint32 alignment = getVecStd430ByteAlignment(basicType);
927 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType)*(int)sizeof(deUint32);
929 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment);
930 maxAlignment = de::max(maxAlignment, alignment);
932 layoutEntry.offset = curOffset;
933 layoutEntry.matrixStride = 0;
937 else if (glu::isDataTypeMatrix(basicType))
939 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
940 const glu::DataType vecType = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
941 const deUint32 vecAlignment = getVecStd430ByteAlignment(vecType);
943 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
944 maxAlignment = de::max(maxAlignment, vecAlignment);
946 layoutEntry.offset = curOffset;
947 layoutEntry.matrixStride = vecAlignment;
949 curOffset += vecAlignment*numVecs;
956 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment);
958 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
959 varIter->stride = totalSize;
963 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
965 return layout.empty() ? 0 : layout[0].stride;
968 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
970 if (varType.isBasicType())
972 const glu::DataType basicType = varType.getBasicType();
973 const bool isMatrix = glu::isDataTypeMatrix(basicType);
974 const int scalarSize = glu::getDataTypeScalarSize(basicType);
975 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
976 const int numComps = scalarSize / numVecs;
978 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
980 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
982 const int srcOffset = (int)sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
983 const int dstOffset = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
984 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
985 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
987 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
992 throw tcu::InternalError("Unsupported type");
995 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
997 if (varType.isBasicType())
999 const glu::DataType basicType = varType.getBasicType();
1000 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1001 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1002 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1003 const int numComps = scalarSize / numVecs;
1005 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1007 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1009 const int srcOffset = layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1010 const int dstOffset = (int)sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1011 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1012 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1014 deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1019 throw tcu::InternalError("Unsupported type");
1022 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1024 const glw::Functions& gl = m_renderCtx.getFunctions();
1025 const deUint32 buffer = *m_inputBuffer;
1026 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1027 const int inputBufferSize = inputStride*numValues;
1029 if (inputBufferSize == 0)
1030 return; // No inputs
1032 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1033 void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, inputBufferSize, GL_MAP_WRITE_BIT);
1034 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1039 DE_ASSERT(m_inputs.size() == m_inputLayout.size());
1040 for (size_t inputNdx = 0; inputNdx < m_inputs.size(); ++inputNdx)
1042 const glu::VarType& varType = m_inputs[inputNdx].varType;
1043 const VarLayout& layout = m_inputLayout[inputNdx];
1045 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], mapPtr);
1050 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1054 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1055 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1058 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1060 const glw::Functions& gl = m_renderCtx.getFunctions();
1061 const deUint32 buffer = *m_outputBuffer;
1062 const deUint32 outputStride = getLayoutStride(m_outputLayout);
1063 const int outputBufferSize = numValues*outputStride;
1065 DE_ASSERT(outputBufferSize > 0); // At least some outputs are required.
1067 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1068 void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, outputBufferSize, GL_MAP_READ_BIT);
1069 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1074 DE_ASSERT(m_outputs.size() == m_outputLayout.size());
1075 for (size_t outputNdx = 0; outputNdx < m_outputs.size(); ++outputNdx)
1077 const glu::VarType& varType = m_outputs[outputNdx].varType;
1078 const VarLayout& layout = m_outputLayout[outputNdx];
1080 copyFromBuffer(varType, layout, numValues, mapPtr, outputPtrs[outputNdx]);
1085 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1089 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1090 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1093 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1096 if (!spec.inputs.empty())
1098 glu::StructType inputStruct("Inputs");
1099 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1100 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1101 src << glu::declare(&inputStruct) << ";\n";
1106 glu::StructType outputStruct("Outputs");
1107 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1108 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1109 src << glu::declare(&outputStruct) << ";\n";
1114 if (!spec.inputs.empty())
1116 src << "layout(binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1118 << " Inputs inputs[];\n"
1122 src << "layout(binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1124 << " Outputs outputs[];\n"
1129 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1131 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1132 src << "\t" << glu::declare(symIter->varType, symIter->name) << " = inputs[" << invocationNdxName << "]." << symIter->name << ";\n";
1134 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1135 src << "\t" << glu::declare(symIter->varType, symIter->name) << ";\n";
1140 std::istringstream opSrc (spec.source);
1143 while (std::getline(opSrc, line))
1144 src << "\t" << line << "\n";
1148 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1149 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1152 // ComputeShaderExecutor
1154 class ComputeShaderExecutor : public BufferIoExecutor
1157 ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1158 ~ComputeShaderExecutor (void);
1160 void execute (int numValues, const void* const* inputs, void* const* outputs);
1163 static std::string generateComputeShader (const ShaderSpec& spec);
1165 tcu::IVec3 m_maxWorkSize;
1168 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
1170 std::ostringstream src;
1172 src << glu::getGLSLVersionDeclaration(spec.version) << "\n";
1174 if (!spec.globalDeclarations.empty())
1175 src << spec.globalDeclarations << "\n";
1177 src << "layout(local_size_x = 1) in;\n"
1180 declareBufferBlocks(src, spec);
1182 src << "void main (void)\n"
1184 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
1185 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
1187 generateExecBufferIo(src, spec, "invocationNdx");
1194 ComputeShaderExecutor::ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1195 : BufferIoExecutor (renderCtx, shaderSpec,
1196 glu::ProgramSources() << glu::ComputeSource(generateComputeShader(shaderSpec)))
1198 m_maxWorkSize = tcu::IVec3(128,128,64); // Minimum in 3plus
1201 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1205 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1207 const glw::Functions& gl = m_renderCtx.getFunctions();
1208 const int maxValuesPerInvocation = m_maxWorkSize[0];
1209 const deUint32 inputStride = getInputStride();
1210 const deUint32 outputStride = getOutputStride();
1212 initBuffers(numValues);
1214 // Setup input buffer & copy data
1215 uploadInputBuffer(inputs, numValues);
1217 // Perform compute invocations
1220 while (curOffset < numValues)
1222 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
1224 if (inputStride > 0)
1225 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer(), curOffset*inputStride, numToExec*inputStride);
1227 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer(), curOffset*outputStride, numToExec*outputStride);
1228 GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferRange(GL_SHADER_STORAGE_BUFFER)");
1230 gl.dispatchCompute(numToExec, 1, 1);
1231 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1233 curOffset += numToExec;
1238 readOutputBuffer(outputs, numValues);
1241 // Tessellation utils
1243 static std::string generateVertexShaderForTess (glu::GLSLVersion version)
1245 std::ostringstream src;
1247 src << glu::getGLSLVersionDeclaration(version) << "\n";
1249 src << "void main (void)\n{\n"
1250 << " gl_Position = vec4(gl_VertexID/2, gl_VertexID%2, 0.0, 1.0);\n"
1256 void checkTessSupport (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, glu::ShaderType stage)
1258 const int numBlockRequired = 2; // highest binding is always 1 (output) i.e. count == 2
1260 if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1261 checkExtension(renderCtx, "GL_EXT_tessellation_shader");
1263 if (stage == glu::SHADERTYPE_TESSELLATION_CONTROL)
1264 checkLimit(renderCtx, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, numBlockRequired);
1265 else if (stage == glu::SHADERTYPE_TESSELLATION_EVALUATION)
1266 checkLimit(renderCtx, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, numBlockRequired);
1271 // TessControlExecutor
1273 class TessControlExecutor : public BufferIoExecutor
1276 static TessControlExecutor* create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1278 ~TessControlExecutor (void);
1280 void execute (int numValues, const void* const* inputs, void* const* outputs);
1284 static std::string generateTessControlShader (const ShaderSpec& shaderSpec);
1287 TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1290 TessControlExecutor* TessControlExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1292 checkTessSupport(renderCtx, shaderSpec, glu::SHADERTYPE_TESSELLATION_CONTROL);
1294 return new TessControlExecutor(renderCtx, shaderSpec);
1297 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
1299 std::ostringstream src;
1301 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1303 if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1304 src << "#extension GL_EXT_tessellation_shader : require\n";
1306 if (!shaderSpec.globalDeclarations.empty())
1307 src << shaderSpec.globalDeclarations << "\n";
1309 src << "\nlayout(vertices = 1) out;\n\n";
1311 declareBufferBlocks(src, shaderSpec);
1313 src << "void main (void)\n{\n";
1315 for (int ndx = 0; ndx < 2; ndx++)
1316 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1318 for (int ndx = 0; ndx < 4; ndx++)
1319 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1322 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
1324 generateExecBufferIo(src, shaderSpec, "invocationId");
1331 static std::string generateEmptyTessEvalShader (glu::GLSLVersion version)
1333 std::ostringstream src;
1335 src << glu::getGLSLVersionDeclaration(version) << "\n";
1337 if (glu::glslVersionIsES(version) && version <= glu::GLSL_VERSION_310_ES)
1338 src << "#extension GL_EXT_tessellation_shader : require\n\n";
1340 src << "layout(triangles, ccw) in;\n";
1342 src << "\nvoid main (void)\n{\n"
1343 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
1349 TessControlExecutor::TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1350 : BufferIoExecutor (renderCtx, shaderSpec, glu::ProgramSources()
1351 << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1352 << glu::TessellationControlSource(generateTessControlShader(shaderSpec))
1353 << glu::TessellationEvaluationSource(generateEmptyTessEvalShader(shaderSpec.version))
1354 << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1358 TessControlExecutor::~TessControlExecutor (void)
1362 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1364 const glw::Functions& gl = m_renderCtx.getFunctions();
1366 initBuffers(numValues);
1368 // Setup input buffer & copy data
1369 uploadInputBuffer(inputs, numValues);
1371 if (!m_inputs.empty())
1372 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1374 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1377 gl.patchParameteri(GL_PATCH_VERTICES, 3);
1378 gl.drawArrays(GL_PATCHES, 0, 3*numValues);
1381 readOutputBuffer(outputs, numValues);
1384 // TessEvaluationExecutor
1386 class TessEvaluationExecutor : public BufferIoExecutor
1389 static TessEvaluationExecutor* create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1391 ~TessEvaluationExecutor (void);
1393 void execute (int numValues, const void* const* inputs, void* const* outputs);
1397 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec);
1400 TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1403 TessEvaluationExecutor* TessEvaluationExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1405 checkTessSupport(renderCtx, shaderSpec, glu::SHADERTYPE_TESSELLATION_EVALUATION);
1407 return new TessEvaluationExecutor(renderCtx, shaderSpec);
1410 static std::string generatePassthroughTessControlShader (glu::GLSLVersion version)
1412 std::ostringstream src;
1414 src << glu::getGLSLVersionDeclaration(version) << "\n";
1416 if (glu::glslVersionIsES(version) && version <= glu::GLSL_VERSION_310_ES)
1417 src << "#extension GL_EXT_tessellation_shader : require\n\n";
1419 src << "layout(vertices = 1) out;\n\n";
1421 src << "void main (void)\n{\n";
1423 for (int ndx = 0; ndx < 2; ndx++)
1424 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1426 for (int ndx = 0; ndx < 4; ndx++)
1427 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1434 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
1436 std::ostringstream src;
1438 src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1440 if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1441 src << "#extension GL_EXT_tessellation_shader : require\n";
1443 if (!shaderSpec.globalDeclarations.empty())
1444 src << shaderSpec.globalDeclarations << "\n";
1448 src << "layout(isolines, equal_spacing) in;\n\n";
1450 declareBufferBlocks(src, shaderSpec);
1452 src << "void main (void)\n{\n"
1453 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
1454 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
1456 generateExecBufferIo(src, shaderSpec, "invocationId");
1463 TessEvaluationExecutor::TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1464 : BufferIoExecutor (renderCtx, shaderSpec, glu::ProgramSources()
1465 << glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1466 << glu::TessellationControlSource(generatePassthroughTessControlShader(shaderSpec.version))
1467 << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec))
1468 << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1472 TessEvaluationExecutor::~TessEvaluationExecutor (void)
1476 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1478 const glw::Functions& gl = m_renderCtx.getFunctions();
1479 const int alignedValues = deAlign32(numValues, 2);
1481 // Initialize buffers with aligned value count to make room for padding
1482 initBuffers(alignedValues);
1484 // Setup input buffer & copy data
1485 uploadInputBuffer(inputs, numValues);
1487 // \todo [2014-06-26 pyry] Duplicate last value in the buffer to prevent infinite loops for example?
1489 if (!m_inputs.empty())
1490 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1492 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1495 gl.patchParameteri(GL_PATCH_VERTICES, 2);
1496 gl.drawArrays(GL_PATCHES, 0, alignedValues);
1499 readOutputBuffer(outputs, numValues);
1504 ShaderExecutor* createExecutor (const glu::RenderContext& renderCtx, glu::ShaderType shaderType, const ShaderSpec& shaderSpec)
1508 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (renderCtx, shaderSpec);
1509 case glu::SHADERTYPE_TESSELLATION_CONTROL: return TessControlExecutor::create (renderCtx, shaderSpec);
1510 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return TessEvaluationExecutor::create (renderCtx, shaderSpec);
1511 case glu::SHADERTYPE_GEOMETRY: return GeometryShaderExecutor::create (renderCtx, shaderSpec);
1512 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (renderCtx, shaderSpec);
1513 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (renderCtx, shaderSpec);
1515 throw tcu::InternalError("Unsupported shader type");