1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
26 #include "vktShaderExecutor.hpp"
28 #include "vkMemUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
38 #include "gluShaderUtil.hpp"
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
57 namespace shaderexecutor
64 DEFAULT_RENDER_WIDTH = 100,
65 DEFAULT_RENDER_HEIGHT = 100,
70 typedef de::SharedPtr<Unique<VkImage> > VkImageSp;
71 typedef de::SharedPtr<Unique<VkImageView> > VkImageViewSp;
72 typedef de::SharedPtr<Unique<VkBuffer> > VkBufferSp;
73 typedef de::SharedPtr<Allocation> AllocationSp;
75 static VkFormat getAttributeFormat(const glu::DataType dataType);
79 static VkClearValue getDefaultClearColor (void)
81 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 static std::string generateEmptyFragmentSource (void)
86 std::ostringstream src;
88 src << "#version 450\n"
89 "layout(location=0) out highp vec4 o_color;\n";
91 src << "void main (void)\n{\n";
92 src << " o_color = vec4(0.0);\n";
98 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
100 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
102 if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
104 if(glu::isDataTypeVector(symIter->varType.getBasicType()))
106 for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
108 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
111 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
115 switch (symIter->varType.getBasicType())
117 case glu::TYPE_FLOAT_MAT2:
120 case glu::TYPE_FLOAT_MAT2X3:
124 case glu::TYPE_FLOAT_MAT2X4:
128 case glu::TYPE_FLOAT_MAT3X2:
132 case glu::TYPE_FLOAT_MAT3:
135 case glu::TYPE_FLOAT_MAT3X4:
139 case glu::TYPE_FLOAT_MAT4X2:
143 case glu::TYPE_FLOAT_MAT4X3:
147 case glu::TYPE_FLOAT_MAT4:
155 for(int i = 0; i < maxRow; i++)
156 for(int j = 0; j < maxCol; j++)
158 src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
163 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
169 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
171 std::ostringstream src;
174 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
176 if (!shaderSpec.globalDeclarations.empty())
177 src << shaderSpec.globalDeclarations << "\n";
179 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
181 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
184 src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
185 << "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
188 src << "\nvoid main (void)\n{\n"
189 << " gl_Position = a_position;\n"
190 << " gl_PointSize = 1.0;\n";
192 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
193 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
200 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
202 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
204 std::ostringstream src;
206 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
208 if (!shaderSpec.globalDeclarations.empty())
209 src << shaderSpec.globalDeclarations << "\n";
211 src << "layout(location = 0) in highp vec4 a_position;\n";
213 int locationNumber = 1;
214 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
216 src << "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
220 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
222 DE_ASSERT(output->varType.isBasicType());
224 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
226 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
227 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
228 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
230 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
233 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
237 << "void main (void)\n"
239 << " gl_Position = a_position;\n"
240 << " gl_PointSize = 1.0;\n";
242 // Declare & fetch local input variables
243 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
245 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
247 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
248 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
251 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
254 // Declare local output variables
255 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
257 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
259 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
260 src << "\t" << tname << " " << output->name << ";\n";
261 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
262 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
265 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
268 // Operation - indented to correct level.
270 std::istringstream opSrc (shaderSpec.source);
273 while (std::getline(opSrc, line))
274 src << "\t" << line << "\n";
277 if (shaderSpec.packFloat16Bit)
278 packFloat16Bit(src, shaderSpec.outputs);
280 // Assignments to outputs.
281 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
283 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
285 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
289 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
291 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
292 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
294 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
297 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
306 struct FragmentOutputLayout
308 std::vector<const Symbol*> locationSymbols; //! Symbols by location
309 std::map<std::string, int> locationMap; //! Map from symbol name to start location
312 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
314 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
316 const Symbol& output = shaderSpec.outputs[outNdx];
317 const int location = de::lookup(outLocationMap, output.name);
318 const std::string outVarName = outputPrefix + output.name;
319 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
321 TCU_CHECK_INTERNAL(output.varType.isBasicType());
323 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
325 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
326 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
327 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
329 decl.varType = uintType;
330 src << decl << ";\n";
332 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
334 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
335 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
336 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP);
338 decl.varType = intType;
339 src << decl << ";\n";
341 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
343 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
344 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
345 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
346 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
348 decl.varType = uintType;
349 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
351 decl.name = outVarName + "_" + de::toString(vecNdx);
352 decl.layout.location = location + vecNdx;
353 src << decl << ";\n";
357 src << decl << ";\n";
361 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
364 packFloat16Bit(src, shaderSpec.outputs);
366 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
368 const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
370 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
371 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
372 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
380 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
382 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
385 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
390 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
394 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
396 std::ostringstream src;
398 src <<"#version 450\n";
400 if (!shaderSpec.globalDeclarations.empty())
401 src << shaderSpec.globalDeclarations << "\n";
403 int locationNumber = 0;
404 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
406 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
408 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
409 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
410 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
412 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
415 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
418 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
420 src << "\nvoid main (void)\n{\n";
422 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
429 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
431 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
433 std::ostringstream src;
435 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
437 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
438 src << "#extension GL_EXT_geometry_shader : require\n";
440 if (!shaderSpec.globalDeclarations.empty())
441 src << shaderSpec.globalDeclarations << "\n";
443 src << "layout(points) in;\n"
444 << "layout(points, max_vertices = 1) out;\n";
446 int locationNumber = 0;
447 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
448 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
451 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
453 DE_ASSERT(output->varType.isBasicType());
455 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
457 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
458 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
459 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
461 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
464 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
468 << "void main (void)\n"
470 << " gl_Position = gl_in[0].gl_Position;\n"
471 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
473 // Fetch input variables
474 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
475 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
477 // Declare local output variables.
478 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
479 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
483 // Operation - indented to correct level.
485 std::istringstream opSrc (shaderSpec.source);
488 while (std::getline(opSrc, line))
489 src << "\t" << line << "\n";
492 // Assignments to outputs.
493 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
495 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
497 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
498 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
500 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
503 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
506 src << " EmitVertex();\n"
507 << " EndPrimitive();\n"
513 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
515 std::ostringstream src;
516 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
517 if (!shaderSpec.globalDeclarations.empty())
518 src << shaderSpec.globalDeclarations << "\n";
520 int locationNumber = 0;
521 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
523 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
526 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
528 src << "\nvoid main (void)\n{\n";
530 // Declare & fetch local input variables
531 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
533 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
535 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
536 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
539 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
542 // Declare output variables
543 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
545 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
547 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
548 src << "\t" << tname << " " << output->name << ";\n";
549 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
550 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
553 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
556 // Operation - indented to correct level.
558 std::istringstream opSrc (shaderSpec.source);
561 while (std::getline(opSrc, line))
562 src << "\t" << line << "\n";
565 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
572 // FragmentOutExecutor
574 class FragmentOutExecutor : public ShaderExecutor
577 FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
578 virtual ~FragmentOutExecutor (void);
580 virtual void execute (int numValues,
581 const void* const* inputs,
582 void* const* outputs,
583 VkDescriptorSet extraResources);
586 const glu::ShaderType m_shaderType;
587 const FragmentOutputLayout m_outputLayout;
590 void bindAttributes (int numValues,
591 const void* const* inputs);
593 void addAttribute (deUint32 bindingLocation,
595 deUint32 sizePerElement,
597 const void* dataPtr);
598 // reinit render data members
599 virtual void clearRenderData (void);
601 const VkDescriptorSetLayout m_extraResourcesLayout;
603 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
604 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
605 std::vector<VkBufferSp> m_vertexBuffers;
606 std::vector<AllocationSp> m_vertexBufferAllocs;
609 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
611 FragmentOutputLayout ret;
614 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
616 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
618 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
619 de::insert(ret.locationMap, it->name, location);
620 location += numLocations;
622 for (int ndx = 0; ndx < numLocations; ++ndx)
623 ret.locationSymbols.push_back(&*it);
629 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
630 : ShaderExecutor (context, shaderSpec)
631 , m_shaderType (shaderType)
632 , m_outputLayout (computeFragmentOutputLayout(m_shaderSpec.outputs))
633 , m_extraResourcesLayout (extraResourcesLayout)
635 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
636 const InstanceInterface& vki = m_context.getInstanceInterface();
639 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
641 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
642 const glu::DataType basicType = symbol.varType.getBasicType();
643 const VkFormat format = getAttributeFormat(basicType);
644 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
645 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
646 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
650 FragmentOutExecutor::~FragmentOutExecutor (void)
654 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
656 std::vector<tcu::Vec2> positions(numValues);
657 for (int valNdx = 0; valNdx < numValues; valNdx++)
659 const int ix = valNdx % renderSize.x();
660 const int iy = valNdx / renderSize.x();
661 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
662 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
664 positions[valNdx] = tcu::Vec2(fx, fy);
670 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
672 const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
674 tcu::TextureFormat::R,
675 tcu::TextureFormat::RG,
676 tcu::TextureFormat::RGBA, // No RGB variants available.
677 tcu::TextureFormat::RGBA
680 const glu::DataType basicType = outputType.getBasicType();
681 const int numComps = glu::getDataTypeNumComponents(basicType);
682 tcu::TextureFormat::ChannelType channelType;
684 switch (glu::getDataTypeScalarType(basicType))
686 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break;
687 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break;
688 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break;
689 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break;
690 case glu::TYPE_FLOAT16: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT; break;
692 throw tcu::InternalError("Invalid output type");
695 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
697 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
700 static VkFormat getAttributeFormat (const glu::DataType dataType)
704 case glu::TYPE_FLOAT16: return VK_FORMAT_R16_SFLOAT;
705 case glu::TYPE_FLOAT16_VEC2: return VK_FORMAT_R16G16_SFLOAT;
706 case glu::TYPE_FLOAT16_VEC3: return VK_FORMAT_R16G16B16_SFLOAT;
707 case glu::TYPE_FLOAT16_VEC4: return VK_FORMAT_R16G16B16A16_SFLOAT;
709 case glu::TYPE_FLOAT: return VK_FORMAT_R32_SFLOAT;
710 case glu::TYPE_FLOAT_VEC2: return VK_FORMAT_R32G32_SFLOAT;
711 case glu::TYPE_FLOAT_VEC3: return VK_FORMAT_R32G32B32_SFLOAT;
712 case glu::TYPE_FLOAT_VEC4: return VK_FORMAT_R32G32B32A32_SFLOAT;
714 case glu::TYPE_INT: return VK_FORMAT_R32_SINT;
715 case glu::TYPE_INT_VEC2: return VK_FORMAT_R32G32_SINT;
716 case glu::TYPE_INT_VEC3: return VK_FORMAT_R32G32B32_SINT;
717 case glu::TYPE_INT_VEC4: return VK_FORMAT_R32G32B32A32_SINT;
719 case glu::TYPE_UINT: return VK_FORMAT_R32_UINT;
720 case glu::TYPE_UINT_VEC2: return VK_FORMAT_R32G32_UINT;
721 case glu::TYPE_UINT_VEC3: return VK_FORMAT_R32G32B32_UINT;
722 case glu::TYPE_UINT_VEC4: return VK_FORMAT_R32G32B32A32_UINT;
724 case glu::TYPE_FLOAT_MAT2: return VK_FORMAT_R32G32_SFLOAT;
725 case glu::TYPE_FLOAT_MAT2X3: return VK_FORMAT_R32G32B32_SFLOAT;
726 case glu::TYPE_FLOAT_MAT2X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
727 case glu::TYPE_FLOAT_MAT3X2: return VK_FORMAT_R32G32_SFLOAT;
728 case glu::TYPE_FLOAT_MAT3: return VK_FORMAT_R32G32B32_SFLOAT;
729 case glu::TYPE_FLOAT_MAT3X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
730 case glu::TYPE_FLOAT_MAT4X2: return VK_FORMAT_R32G32_SFLOAT;
731 case glu::TYPE_FLOAT_MAT4X3: return VK_FORMAT_R32G32B32_SFLOAT;
732 case glu::TYPE_FLOAT_MAT4: return VK_FORMAT_R32G32B32A32_SFLOAT;
735 return VK_FORMAT_UNDEFINED;
739 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
741 // Add binding specification
742 const deUint32 binding = (deUint32)m_vertexBindingDescriptions.size();
743 const VkVertexInputBindingDescription bindingDescription =
747 VK_VERTEX_INPUT_RATE_VERTEX
750 m_vertexBindingDescriptions.push_back(bindingDescription);
752 // Add location and format specification
753 const VkVertexInputAttributeDescription attributeDescription =
755 bindingLocation, // deUint32 location;
756 binding, // deUint32 binding;
757 format, // VkFormat format;
758 0u, // deUint32 offsetInBytes;
761 m_vertexAttributeDescriptions.push_back(attributeDescription);
763 // Upload data to buffer
764 const VkDevice vkDevice = m_context.getDevice();
765 const DeviceInterface& vk = m_context.getDeviceInterface();
766 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
768 const VkDeviceSize inputSize = sizePerElement * count;
769 const VkBufferCreateInfo vertexBufferParams =
771 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
772 DE_NULL, // const void* pNext;
773 0u, // VkBufferCreateFlags flags;
774 inputSize, // VkDeviceSize size;
775 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
776 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
777 1u, // deUint32 queueFamilyCount;
778 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
781 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
782 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
784 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
786 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
787 flushAlloc(vk, vkDevice, *alloc);
789 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
790 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
793 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
796 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
798 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
799 const void* ptr = inputs[inputNdx];
800 const glu::DataType basicType = symbol.varType.getBasicType();
801 const int vecSize = glu::getDataTypeScalarSize(basicType);
802 const VkFormat format = getAttributeFormat(basicType);
804 int numAttrsToAdd = 1;
806 if (glu::isDataTypeFloatOrVec(basicType))
807 elementSize = sizeof(float);
808 else if (glu::isDataTypeFloat16OrVec(basicType))
809 elementSize = sizeof(deUint16);
810 else if (glu::isDataTypeIntOrIVec(basicType))
811 elementSize = sizeof(int);
812 else if (glu::isDataTypeUintOrUVec(basicType))
813 elementSize = sizeof(deUint32);
814 else if (glu::isDataTypeMatrix(basicType))
816 int numRows = glu::getDataTypeMatrixNumRows(basicType);
817 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
819 elementSize = numRows * numCols * (int)sizeof(float);
820 numAttrsToAdd = numCols;
825 // add attributes, in case of matrix every column is binded as an attribute
826 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
828 addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
833 void FragmentOutExecutor::clearRenderData (void)
835 m_vertexBindingDescriptions.clear();
836 m_vertexAttributeDescriptions.clear();
837 m_vertexBuffers.clear();
838 m_vertexBufferAllocs.clear();
841 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
843 const VkDescriptorSetLayoutCreateInfo createInfo =
845 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
847 (VkDescriptorSetLayoutCreateFlags)0,
851 return createDescriptorSetLayout(vkd, device, &createInfo);
854 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
856 const VkDescriptorPoolSize dummySize =
858 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
861 const VkDescriptorPoolCreateInfo createInfo =
863 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
865 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
870 return createDescriptorPool(vkd, device, &createInfo);
873 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
875 const VkDescriptorSetAllocateInfo allocInfo =
877 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
883 return allocateDescriptorSet(vkd, device, &allocInfo);
886 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
888 const VkDevice vkDevice = m_context.getDevice();
889 const DeviceInterface& vk = m_context.getDeviceInterface();
890 const VkQueue queue = m_context.getUniversalQueue();
891 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
892 Allocator& memAlloc = m_context.getDefaultAllocator();
894 const deUint32 renderSizeX = de::min(static_cast<deUint32>(DEFAULT_RENDER_WIDTH), (deUint32)numValues);
895 const deUint32 renderSizeY = ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
896 const tcu::UVec2 renderSize (renderSizeX, renderSizeY);
897 std::vector<tcu::Vec2> positions;
899 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
901 std::vector<VkImageSp> colorImages;
902 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
903 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
904 std::vector<AllocationSp> colorImageAllocs;
905 std::vector<VkAttachmentDescription> attachments;
906 std::vector<VkClearValue> attachmentClearValues;
907 std::vector<VkImageViewSp> colorImageViews;
909 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
910 std::vector<VkAttachmentReference> colorAttachmentReferences;
912 Move<VkRenderPass> renderPass;
913 Move<VkFramebuffer> framebuffer;
914 Move<VkPipelineLayout> pipelineLayout;
915 Move<VkPipeline> graphicsPipeline;
917 Move<VkShaderModule> vertexShaderModule;
918 Move<VkShaderModule> geometryShaderModule;
919 Move<VkShaderModule> fragmentShaderModule;
921 Move<VkCommandPool> cmdPool;
922 Move<VkCommandBuffer> cmdBuffer;
924 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout (createEmptyDescriptorSetLayout(vk, vkDevice));
925 Unique<VkDescriptorPool> dummyDescriptorPool (createDummyDescriptorPool(vk, vkDevice));
926 Unique<VkDescriptorSet> emptyDescriptorSet (allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
930 // Compute positions - 1px points are used to drive fragment shading.
931 positions = computeVertexPositions(numValues, renderSize.cast<int>());
934 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
935 bindAttributes(numValues, inputs);
937 // Create color images
939 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
941 VK_FALSE, // VkBool32 blendEnable;
942 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
943 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
944 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
945 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
946 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
947 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
948 (VK_COLOR_COMPONENT_R_BIT |
949 VK_COLOR_COMPONENT_G_BIT |
950 VK_COLOR_COMPONENT_B_BIT |
951 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
954 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
956 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
957 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
958 const bool isSigned = isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
959 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
960 const VkFormat colorFormat = isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT));
963 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
964 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
965 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
968 const VkImageCreateInfo colorImageParams =
970 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
971 DE_NULL, // const void* pNext;
972 0u, // VkImageCreateFlags flags;
973 VK_IMAGE_TYPE_2D, // VkImageType imageType;
974 colorFormat, // VkFormat format;
975 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
976 1u, // deUint32 mipLevels;
977 1u, // deUint32 arraySize;
978 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
979 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
980 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
981 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
982 1u, // deUint32 queueFamilyCount;
983 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
984 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
987 const VkAttachmentDescription colorAttachmentDescription =
989 0u, // VkAttachmentDescriptorFlags flags;
990 colorFormat, // VkFormat format;
991 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
992 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
993 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
994 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
995 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
996 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
997 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1000 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1001 colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1002 attachmentClearValues.push_back(getDefaultClearColor());
1004 // Allocate and bind color image memory
1006 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1007 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1008 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1010 attachments.push_back(colorAttachmentDescription);
1011 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1013 const VkAttachmentReference colorAttachmentReference =
1015 (deUint32) (colorImages.size() - 1), // deUint32 attachment;
1016 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1019 colorAttachmentReferences.push_back(colorAttachmentReference);
1022 // Create color attachment view
1024 const VkImageViewCreateInfo colorImageViewParams =
1026 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1027 DE_NULL, // const void* pNext;
1028 0u, // VkImageViewCreateFlags flags;
1029 colorImages.back().get()->get(), // VkImage image;
1030 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1031 colorFormat, // VkFormat format;
1033 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1034 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1035 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1036 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1037 }, // VkComponentMapping components;
1039 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1040 0u, // deUint32 baseMipLevel;
1041 1u, // deUint32 mipLevels;
1042 0u, // deUint32 baseArraySlice;
1043 1u // deUint32 arraySize;
1044 } // VkImageSubresourceRange subresourceRange;
1047 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1048 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1050 const VkImageMemoryBarrier colorImagePreRenderBarrier =
1052 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1054 0u, // srcAccessMask
1055 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1056 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1057 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1058 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1059 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1060 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1061 colorImages.back().get()->get(), // image
1063 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1066 0u, // baseArrayLayer
1068 } // subresourceRange
1070 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1072 const VkImageMemoryBarrier colorImagePostRenderBarrier =
1074 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1076 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1077 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1078 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1079 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1080 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1081 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1082 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1083 colorImages.back().get()->get(), // image
1085 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1088 0u, // baseArrayLayer
1090 } // subresourceRange
1092 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1097 // Create render pass
1099 const VkSubpassDescription subpassDescription =
1101 0u, // VkSubpassDescriptionFlags flags;
1102 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1103 0u, // deUint32 inputCount;
1104 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1105 (deUint32)colorImages.size(), // deUint32 colorCount;
1106 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1107 DE_NULL, // const VkAttachmentReference* resolveAttachments;
1108 DE_NULL, // VkAttachmentReference depthStencilAttachment;
1109 0u, // deUint32 preserveCount;
1110 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
1113 const VkRenderPassCreateInfo renderPassParams =
1115 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1116 DE_NULL, // const void* pNext;
1117 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1118 (deUint32)attachments.size(), // deUint32 attachmentCount;
1119 &attachments[0], // const VkAttachmentDescription* pAttachments;
1120 1u, // deUint32 subpassCount;
1121 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1122 0u, // deUint32 dependencyCount;
1123 DE_NULL // const VkSubpassDependency* pDependencies;
1126 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1129 // Create framebuffer
1131 std::vector<VkImageView> views(colorImageViews.size());
1132 for (size_t i = 0; i < colorImageViews.size(); i++)
1134 views[i] = colorImageViews[i].get()->get();
1137 const VkFramebufferCreateInfo framebufferParams =
1139 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1140 DE_NULL, // const void* pNext;
1141 0u, // VkFramebufferCreateFlags flags;
1142 *renderPass, // VkRenderPass renderPass;
1143 (deUint32)views.size(), // deUint32 attachmentCount;
1144 &views[0], // const VkImageView* pAttachments;
1145 (deUint32)renderSize.x(), // deUint32 width;
1146 (deUint32)renderSize.y(), // deUint32 height;
1147 1u // deUint32 layers;
1150 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1153 // Create pipeline layout
1155 const VkDescriptorSetLayout setLayouts[] =
1157 *emptyDescriptorSetLayout,
1158 m_extraResourcesLayout
1160 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
1162 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1163 DE_NULL, // const void* pNext;
1164 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1165 (m_extraResourcesLayout != 0 ? 2u : 0u), // deUint32 descriptorSetCount;
1166 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1167 0u, // deUint32 pushConstantRangeCount;
1168 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
1171 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1176 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1177 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1179 if (useGeometryShader)
1181 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1182 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1184 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1190 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1192 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1193 DE_NULL, // const void* pNext;
1194 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1195 (deUint32)m_vertexBindingDescriptions.size(), // deUint32 bindingCount;
1196 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1197 (deUint32)m_vertexAttributeDescriptions.size(), // deUint32 attributeCount;
1198 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1201 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
1202 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
1204 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1206 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1207 DE_NULL, // const void* pNext;
1208 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1209 VK_FALSE, // VkBool32 logicOpEnable;
1210 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1211 (deUint32)colorBlendAttachmentStates.size(), // deUint32 attachmentCount;
1212 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1213 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConst[4];
1216 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
1217 vkDevice, // const VkDevice device
1218 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1219 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1220 DE_NULL, // const VkShaderModule tessellationControlShaderModule
1221 DE_NULL, // const VkShaderModule tessellationEvalShaderModule
1222 useGeometryShader ? *geometryShaderModule : DE_NULL, // const VkShaderModule geometryShaderModule
1223 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1224 *renderPass, // const VkRenderPass renderPass
1225 viewports, // const std::vector<VkViewport>& viewports
1226 scissors, // const std::vector<VkRect2D>& scissors
1227 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1228 0u, // const deUint32 subpass
1229 0u, // const deUint32 patchControlPoints
1230 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1231 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1232 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1233 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1234 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1237 // Create command pool
1238 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1240 // Create command buffer
1242 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1244 beginCommandBuffer(vk, *cmdBuffer);
1246 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1247 0, (const VkMemoryBarrier*)DE_NULL,
1248 0, (const VkBufferMemoryBarrier*)DE_NULL,
1249 (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1250 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1252 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1254 if (m_extraResourcesLayout != 0)
1256 DE_ASSERT(extraResources != 0);
1257 const VkDescriptorSet descriptorSets[] = { *emptyDescriptorSet, extraResources };
1258 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1261 DE_ASSERT(extraResources == 0);
1263 const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1265 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1267 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1268 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1270 buffers[i] = m_vertexBuffers[i].get()->get();
1273 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1274 vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1276 endRenderPass(vk, *cmdBuffer);
1277 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1278 0, (const VkMemoryBarrier*)DE_NULL,
1279 0, (const VkBufferMemoryBarrier*)DE_NULL,
1280 (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1282 endCommandBuffer(vk, *cmdBuffer);
1286 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1288 // Read back result and output
1290 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1291 const VkBufferCreateInfo readImageBufferParams =
1293 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1294 DE_NULL, // const void* pNext;
1295 0u, // VkBufferCreateFlags flags;
1296 imageSizeBytes, // VkDeviceSize size;
1297 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1298 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1299 1u, // deUint32 queueFamilyCount;
1300 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
1303 // constants for image copy
1304 Move<VkCommandPool> copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1306 const VkBufferImageCopy copyParams =
1308 0u, // VkDeviceSize bufferOffset;
1309 (deUint32)renderSize.x(), // deUint32 bufferRowLength;
1310 (deUint32)renderSize.y(), // deUint32 bufferImageHeight;
1312 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1313 0u, // deUint32 mipLevel;
1314 0u, // deUint32 arraySlice;
1315 1u, // deUint32 arraySize;
1316 }, // VkImageSubresource imageSubresource;
1317 { 0u, 0u, 0u }, // VkOffset3D imageOffset;
1318 { renderSize.x(), renderSize.y(), 1u } // VkExtent3D imageExtent;
1321 // Read back pixels.
1322 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1324 const Symbol& output = m_shaderSpec.outputs[outNdx];
1325 const int outSize = output.varType.getScalarSize();
1326 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1327 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1328 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1330 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1332 tcu::TextureLevel tmpBuf;
1333 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1334 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type);
1335 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1336 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1338 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1340 // Copy image to buffer
1343 Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1345 beginCommandBuffer(vk, *copyCmdBuffer);
1346 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1348 // Insert a barrier so data written by the transfer is available to the host
1350 const VkBufferMemoryBarrier barrier =
1352 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1353 DE_NULL, // const void* pNext;
1354 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1355 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1356 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
1357 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
1358 *readImageBuffer, // VkBuffer buffer;
1359 0, // VkDeviceSize offset;
1360 VK_WHOLE_SIZE, // VkDeviceSize size;
1363 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1364 0, (const VkMemoryBarrier*)DE_NULL,
1366 0, (const VkImageMemoryBarrier*)DE_NULL);
1369 endCommandBuffer(vk, *copyCmdBuffer);
1371 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1374 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1376 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1378 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1379 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1381 tcu::copy(tmpBuf.getAccess(), resultAccess);
1383 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1385 deUint16* dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1386 if (outSize == 4 && outNumLocs == 1)
1387 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1390 for (int valNdx = 0; valNdx < numValues; valNdx++)
1392 const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1393 deUint16* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1394 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1400 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1401 if (outSize == 4 && outNumLocs == 1)
1402 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1405 for (int valNdx = 0; valNdx < numValues; valNdx++)
1407 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1408 deUint32* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1409 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1418 // VertexShaderExecutor
1420 class VertexShaderExecutor : public FragmentOutExecutor
1423 VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1424 virtual ~VertexShaderExecutor (void);
1426 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& dst);
1429 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1430 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1434 VertexShaderExecutor::~VertexShaderExecutor (void)
1438 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1440 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1442 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1443 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1444 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1447 // GeometryShaderExecutor
1449 class GeometryShaderExecutor : public FragmentOutExecutor
1452 GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1453 virtual ~GeometryShaderExecutor (void);
1455 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1459 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1460 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1462 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1464 if (!features.geometryShader)
1465 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1468 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1472 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1474 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1476 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1478 programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1479 programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1481 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1482 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1486 // FragmentShaderExecutor
1488 class FragmentShaderExecutor : public FragmentOutExecutor
1491 FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1492 virtual ~FragmentShaderExecutor (void);
1494 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1498 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1499 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1503 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1507 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1509 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1511 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1512 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1513 programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1516 // Shared utilities for compute and tess executors
1518 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1522 case glu::TYPE_FLOAT16: return 2u;
1523 case glu::TYPE_FLOAT16_VEC2: return 4u;
1524 case glu::TYPE_FLOAT16_VEC3: return 8u;
1525 case glu::TYPE_FLOAT16_VEC4: return 8u;
1529 switch (glu::getDataTypeScalarSize(type))
1541 class BufferIoExecutor : public ShaderExecutor
1544 BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec);
1545 virtual ~BufferIoExecutor (void);
1550 INPUT_BUFFER_BINDING = 0,
1551 OUTPUT_BUFFER_BINDING = 1,
1554 void initBuffers (int numValues);
1555 VkBuffer getInputBuffer (void) const { return *m_inputBuffer; }
1556 VkBuffer getOutputBuffer (void) const { return *m_outputBuffer; }
1557 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); }
1558 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); }
1560 void uploadInputBuffer (const void* const* inputPtrs, int numValues);
1561 void readOutputBuffer (void* const* outputPtrs, int numValues);
1563 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec);
1564 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1567 Move<VkBuffer> m_inputBuffer;
1568 Move<VkBuffer> m_outputBuffer;
1575 deUint32 matrixStride;
1577 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1580 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1581 static deUint32 getLayoutStride (const vector<VarLayout>& layout);
1583 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1584 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1586 de::MovePtr<Allocation> m_inputAlloc;
1587 de::MovePtr<Allocation> m_outputAlloc;
1589 vector<VarLayout> m_inputLayout;
1590 vector<VarLayout> m_outputLayout;
1593 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1594 : ShaderExecutor(context, shaderSpec)
1596 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1597 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1600 BufferIoExecutor::~BufferIoExecutor (void)
1604 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1606 return layout.empty() ? 0 : layout[0].stride;
1609 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1611 deUint32 maxAlignment = 0;
1612 deUint32 curOffset = 0;
1614 DE_ASSERT(layout != DE_NULL);
1615 DE_ASSERT(layout->empty());
1616 layout->resize(symbols.size());
1618 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1620 const Symbol& symbol = symbols[varNdx];
1621 const glu::DataType basicType = symbol.varType.getBasicType();
1622 VarLayout& layoutEntry = (*layout)[varNdx];
1624 if (glu::isDataTypeScalarOrVector(basicType))
1626 const deUint32 alignment = getVecStd430ByteAlignment(basicType);
1627 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1629 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment);
1630 maxAlignment = de::max(maxAlignment, alignment);
1632 layoutEntry.offset = curOffset;
1633 layoutEntry.matrixStride = 0;
1637 else if (glu::isDataTypeMatrix(basicType))
1639 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1640 const glu::DataType vecType = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
1641 const deUint32 vecAlignment = isDataTypeFloat16OrVec(basicType) ? getVecStd430ByteAlignment(vecType)/2 : getVecStd430ByteAlignment(vecType);
1643 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1644 maxAlignment = de::max(maxAlignment, vecAlignment);
1646 layoutEntry.offset = curOffset;
1647 layoutEntry.matrixStride = vecAlignment;
1649 curOffset += vecAlignment*numVecs;
1656 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment);
1658 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1659 varIter->stride = totalSize;
1663 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1666 if (!spec.inputs.empty())
1668 glu::StructType inputStruct("Inputs");
1669 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1670 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1671 src << glu::declare(&inputStruct) << ";\n";
1676 glu::StructType outputStruct("Outputs");
1677 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1678 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1679 src << glu::declare(&outputStruct) << ";\n";
1684 if (!spec.inputs.empty())
1686 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1688 << " Inputs inputs[];\n"
1692 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1694 << " Outputs outputs[];\n"
1699 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1702 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1704 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1707 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1711 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1713 src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1716 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1718 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1721 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1725 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1727 src << "\t" << tname << " " << symIter->name << ";\n";
1730 const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1731 src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1738 std::istringstream opSrc (spec.source);
1741 while (std::getline(opSrc, line))
1742 src << "\t" << line << "\n";
1745 if (spec.packFloat16Bit)
1746 packFloat16Bit (src, spec.outputs);
1749 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1751 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1753 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1755 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1759 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1761 if (varType.isBasicType())
1763 const glu::DataType basicType = varType.getBasicType();
1764 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1765 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1766 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1767 const int numComps = scalarSize / numVecs;
1769 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1771 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1773 const int size = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1774 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1775 const int dstOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1776 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1777 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1779 deMemcpy(dstPtr, srcPtr, size * numComps);
1784 throw tcu::InternalError("Unsupported type");
1787 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1789 if (varType.isBasicType())
1791 const glu::DataType basicType = varType.getBasicType();
1792 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1793 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1794 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1795 const int numComps = scalarSize / numVecs;
1797 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1799 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1801 const int size = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1802 const int srcOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1803 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1804 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1805 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1807 deMemcpy(dstPtr, srcPtr, size * numComps);
1812 throw tcu::InternalError("Unsupported type");
1815 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1817 const VkDevice vkDevice = m_context.getDevice();
1818 const DeviceInterface& vk = m_context.getDeviceInterface();
1820 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1821 const int inputBufferSize = inputStride * numValues;
1823 if (inputBufferSize == 0)
1824 return; // No inputs
1826 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1827 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1829 const glu::VarType& varType = m_shaderSpec.inputs[inputNdx].varType;
1830 const VarLayout& layout = m_inputLayout[inputNdx];
1832 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr());
1835 flushAlloc(vk, vkDevice, *m_inputAlloc);
1838 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1840 const VkDevice vkDevice = m_context.getDevice();
1841 const DeviceInterface& vk = m_context.getDeviceInterface();
1843 DE_ASSERT(numValues > 0); // At least some outputs are required.
1845 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1847 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1848 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1850 const glu::VarType& varType = m_shaderSpec.outputs[outputNdx].varType;
1851 const VarLayout& layout = m_outputLayout[outputNdx];
1853 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1857 void BufferIoExecutor::initBuffers (int numValues)
1859 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1860 const deUint32 outputStride = getLayoutStride(m_outputLayout);
1861 // Avoid creating zero-sized buffer/memory
1862 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
1863 const size_t outputBufferSize = numValues * outputStride;
1865 // Upload data to buffer
1866 const VkDevice vkDevice = m_context.getDevice();
1867 const DeviceInterface& vk = m_context.getDeviceInterface();
1868 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1869 Allocator& memAlloc = m_context.getDefaultAllocator();
1871 const VkBufferCreateInfo inputBufferParams =
1873 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1874 DE_NULL, // const void* pNext;
1875 0u, // VkBufferCreateFlags flags;
1876 inputBufferSize, // VkDeviceSize size;
1877 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1878 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1879 1u, // deUint32 queueFamilyCount;
1880 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1883 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1884 m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1886 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1888 const VkBufferCreateInfo outputBufferParams =
1890 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1891 DE_NULL, // const void* pNext;
1892 0u, // VkBufferCreateFlags flags;
1893 outputBufferSize, // VkDeviceSize size;
1894 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1895 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1896 1u, // deUint32 queueFamilyCount;
1897 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1900 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1901 m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1903 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1906 // ComputeShaderExecutor
1908 class ComputeShaderExecutor : public BufferIoExecutor
1911 ComputeShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1912 virtual ~ComputeShaderExecutor (void);
1914 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1916 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1919 static std::string generateComputeShader (const ShaderSpec& spec);
1922 const VkDescriptorSetLayout m_extraResourcesLayout;
1925 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1926 : BufferIoExecutor (context, shaderSpec)
1927 , m_extraResourcesLayout (extraResourcesLayout)
1931 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1935 std::string getTypeSpirv(const glu::DataType type)
1939 case glu::TYPE_FLOAT16:
1941 case glu::TYPE_FLOAT16_VEC2:
1943 case glu::TYPE_FLOAT16_VEC3:
1945 case glu::TYPE_FLOAT16_VEC4:
1947 case glu::TYPE_FLOAT:
1949 case glu::TYPE_FLOAT_VEC2:
1951 case glu::TYPE_FLOAT_VEC3:
1953 case glu::TYPE_FLOAT_VEC4:
1957 case glu::TYPE_INT_VEC2:
1959 case glu::TYPE_INT_VEC3:
1961 case glu::TYPE_INT_VEC4:
1970 std::string moveBitOperation (std::string variableName, const int operationNdx)
1972 std::ostringstream src;
1974 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
1975 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
1976 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
1980 std::string sclarComparison(const std::string opeartion, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
1982 std::ostringstream src;
1983 std::string boolType;
1987 case glu::TYPE_FLOAT16:
1988 case glu::TYPE_FLOAT:
1990 << "%operation_result_" << operationNdx << " = " << opeartion << " %bool %in0_val %in1_val\n"
1991 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
1992 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
1993 << "%label_IF_" << operationNdx << " = OpLabel\n"
1994 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
1995 << "%out_val_" << operationNdx << " = OpLoad %i32 %out\n"
1996 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
1997 << "OpStore %out %add_if_" << operationNdx << "\n"
1998 << "OpBranch %IF_" << operationNdx << "\n"
1999 << "%IF_" << operationNdx << " = OpLabel\n";
2001 case glu::TYPE_FLOAT16_VEC2:
2002 case glu::TYPE_FLOAT_VEC2:
2003 boolType = "%v2bool";
2005 case glu::TYPE_FLOAT16_VEC3:
2006 case glu::TYPE_FLOAT_VEC3:
2007 boolType = "%v3bool";
2009 case glu::TYPE_FLOAT16_VEC4:
2010 case glu::TYPE_FLOAT_VEC4:
2011 boolType = "%v4bool";
2020 << "%operation_result_" << operationNdx << " = " << opeartion << " " << boolType << " %in0_val %in1_val\n"
2021 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2022 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2024 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2025 for(int ndx = 0; ndx < scalarSize; ++ndx)
2026 src << " %operation_val_" << operationNdx;
2029 src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2030 << "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out\n"
2032 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2033 << "OpStore %out %add_if_" << operationNdx << "\n";
2038 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool isMediump)
2040 const int operationAmount = 10;
2042 const std::string inputType1 = getTypeSpirv(spec.inputs[0].varType.getBasicType());
2043 const std::string inputType2 = getTypeSpirv(spec.inputs[1].varType.getBasicType());
2044 const std::string outputType = getTypeSpirv(spec.outputs[0].varType.getBasicType());
2045 const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2047 std::string opeartions[operationAmount] =
2050 "OpFOrdGreaterThan",
2052 "OpFOrdGreaterThanEqual",
2053 "OpFOrdLessThanEqual",
2055 "OpFUnordGreaterThan",
2057 "OpFUnordGreaterThanEqual",
2058 "OpFUnordLessThanEqual"
2061 std::ostringstream src;
2064 "; Generator: Khronos Glslang Reference Front End; 4\n"
2067 "OpCapability Shader\n";
2069 if (spec.packFloat16Bit || are16Bit)
2070 src << "OpCapability Float16\n";
2073 src << "OpCapability StorageBuffer16BitAccess\n"
2074 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2077 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2079 src << "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2080 "OpMemoryModel Logical GLSL450\n"
2081 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2082 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2083 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2084 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2090 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2092 src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2094 offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2096 src << "OpDecorate %up_SSB0_IN ArrayStride "<< offset << "\n";
2099 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2100 "OpDecorate %ssboIN BufferBlock\n"
2101 "OpDecorate %ssbo_src DescriptorSet 0\n"
2102 "OpDecorate %ssbo_src Binding 0\n"
2107 src << "OpMemberDecorate %SSB0_IN 1 RelaxedPrecision\n"
2108 "OpDecorate %in0 RelaxedPrecision\n"
2109 "OpMemberDecorate %SSB0_IN 0 RelaxedPrecision\n"
2110 "OpDecorate %src_val_0_0 RelaxedPrecision\n"
2111 "OpDecorate %src_val_0_0 RelaxedPrecision\n"
2112 "OpDecorate %in1 RelaxedPrecision\n"
2113 "OpDecorate %src_val_0_1 RelaxedPrecision\n"
2114 "OpDecorate %src_val_0_1 RelaxedPrecision\n"
2115 "OpDecorate %in0_val RelaxedPrecision\n"
2116 "OpDecorate %in1_val RelaxedPrecision\n"
2117 "OpDecorate %in0_val RelaxedPrecision\n"
2118 "OpDecorate %in1_val RelaxedPrecision\n"
2119 "OpMemberDecorate %SSB0_OUT 0 RelaxedPrecision\n";
2126 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
2128 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2130 offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2132 src << "OpDecorate %up_SSB0_OUT ArrayStride " << offset << "\n";
2135 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2136 "OpDecorate %ssboOUT BufferBlock\n"
2137 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2138 "OpDecorate %ssbo_dst Binding 1\n"
2140 "%void = OpTypeVoid\n"
2141 "%bool = OpTypeBool\n"
2142 "%v2bool = OpTypeVector %bool 2\n"
2143 "%v3bool = OpTypeVector %bool 3\n"
2144 "%v4bool = OpTypeVector %bool 4\n"
2145 "%u32 = OpTypeInt 32 0\n";
2147 if (!are16Bit) //f32 is not needed when shader operates only on f16
2148 src << "%f32 = OpTypeFloat 32\n"
2149 "%v2f32 = OpTypeVector %f32 2\n"
2150 "%v3f32 = OpTypeVector %f32 3\n"
2151 "%v4f32 = OpTypeVector %f32 4\n";
2153 if (spec.packFloat16Bit || are16Bit)
2154 src << "%f16 = OpTypeFloat 16\n"
2155 "%v2f16 = OpTypeVector %f16 2\n"
2156 "%v3f16 = OpTypeVector %f16 3\n"
2157 "%v4f16 = OpTypeVector %f16 4\n";
2159 src << "%i32 = OpTypeInt 32 1\n"
2160 "%v2i32 = OpTypeVector %i32 2\n"
2161 "%v3i32 = OpTypeVector %i32 3\n"
2162 "%v4i32 = OpTypeVector %i32 4\n"
2163 "%v3u32 = OpTypeVector %u32 3\n"
2165 "%ip_u32 = OpTypePointer Input %u32\n"
2166 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2167 "%up_float = OpTypePointer Uniform " << inputType1 << "\n"
2169 "%fun = OpTypeFunction %void\n"
2170 "%fp_u32 = OpTypePointer Function %u32\n"
2171 "%fp_i32 = OpTypePointer Function " << outputType << "\n"
2172 "%fp_f32 = OpTypePointer Function " << inputType1 << "\n"
2173 "%fp_operation = OpTypePointer Function %i32\n";
2175 if (spec.packFloat16Bit)
2176 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2178 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2179 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2180 "%up_i32 = OpTypePointer Uniform " << outputType << "\n"
2182 "%c_u32_0 = OpConstant %u32 0\n"
2183 "%c_u32_1 = OpConstant %u32 1\n"
2184 "%c_u32_2 = OpConstant %u32 2\n"
2185 "%c_i32_0 = OpConstant %i32 0\n"
2186 "%c_i32_1 = OpConstant %i32 1\n"
2187 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2188 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2189 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2190 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2191 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2192 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2194 "%SSB0_IN = OpTypeStruct " << inputType1 << " " << inputType2 << "\n"
2195 "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2196 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2197 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2198 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2200 "%SSB0_OUT = OpTypeStruct " << outputType << "\n"
2201 "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2202 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2203 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2204 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2206 "%BP_main = OpFunction %void None %fun\n"
2207 "%BP_label = OpLabel\n"
2208 "%invocationNdx = OpVariable %fp_u32 Function\n";
2210 if (spec.packFloat16Bit)
2211 src << "%in0 = OpVariable %fp_f16 Function\n"
2212 "%in1 = OpVariable %fp_f16 Function\n";
2214 src << "%in0 = OpVariable %fp_f32 Function\n"
2215 "%in1 = OpVariable %fp_f32 Function\n";
2217 src << "%operation = OpVariable %fp_operation Function\n"
2218 "%out = OpVariable %fp_i32 Function\n"
2219 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2220 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2221 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2222 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2223 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2224 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2225 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2226 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2227 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2228 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2230 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2231 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2232 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2233 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2234 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2235 "OpStore %invocationNdx %add_2\n"
2236 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n"
2238 "%src_ptr_0_0 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_0\n"
2239 "%src_val_0_0 = OpLoad " << inputType1 << " %src_ptr_0_0\n";
2241 if(spec.packFloat16Bit)
2242 src << "%val_f16_0_0 = OpFConvert " << packType <<" %src_val_0_0\n"
2243 "OpStore %in0 %val_f16_0_0\n";
2245 src << "OpStore %in0 %src_val_0_0\n";
2248 "%src_ptr_0_1 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_1\n"
2249 "%src_val_0_1 = OpLoad " << inputType2 << " %src_ptr_0_1\n";
2251 if (spec.packFloat16Bit)
2252 src << "%val_f16_0_1 = OpFConvert " << packType << " %src_val_0_1\n"
2253 "OpStore %in1 %val_f16_0_1\n";
2255 src << "OpStore %in1 %src_val_0_1\n";
2258 "OpStore %operation %c_i32_1\n"
2259 "OpStore %out %c_" << &outputType[1] << "_0\n"
2262 if (spec.packFloat16Bit)
2263 src << "%in0_val = OpLoad " << packType << " %in0\n"
2264 "%in1_val = OpLoad " << packType << " %in1\n";
2266 src << "%in0_val = OpLoad " << inputType1 << " %in0\n"
2267 "%in1_val = OpLoad " << inputType2 << " %in1\n";
2270 for(int operationNdx = 0; operationNdx < operationAmount; ++operationNdx)
2272 src << sclarComparison (opeartions[operationNdx], operationNdx,
2273 spec.inputs[0].varType.getBasicType(),
2275 spec.outputs[0].varType.getScalarSize());
2276 src << moveBitOperation("%operation", moveBitNdx);
2281 "%out_val_final = OpLoad " << outputType << " %out\n"
2282 "%ssbo_dst_ptr = OpAccessChain %up_i32 %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n"
2283 "OpStore %ssbo_dst_ptr %out_val_final\n"
2291 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2293 if(spec.spirVShader)
2295 bool are16Bit = false;
2296 bool isMediump = false;
2297 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2299 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2302 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2305 if(isMediump && are16Bit)
2309 return generateSpirv(spec, are16Bit, isMediump);
2313 std::ostringstream src;
2314 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2316 if (!spec.globalDeclarations.empty())
2317 src << spec.globalDeclarations << "\n";
2319 src << "layout(local_size_x = 1) in;\n"
2322 declareBufferBlocks(src, spec);
2324 src << "void main (void)\n"
2326 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2327 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2329 generateExecBufferIo(src, spec, "invocationNdx");
2337 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2339 if(shaderSpec.spirVShader)
2340 programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2342 programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2345 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2347 const VkDevice vkDevice = m_context.getDevice();
2348 const DeviceInterface& vk = m_context.getDeviceInterface();
2349 const VkQueue queue = m_context.getUniversalQueue();
2350 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2352 DescriptorPoolBuilder descriptorPoolBuilder;
2353 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2355 Move<VkShaderModule> computeShaderModule;
2356 Move<VkPipeline> computePipeline;
2357 Move<VkPipelineLayout> pipelineLayout;
2358 Move<VkCommandPool> cmdPool;
2359 Move<VkDescriptorPool> descriptorPool;
2360 Move<VkDescriptorSetLayout> descriptorSetLayout;
2361 Move<VkDescriptorSet> descriptorSet;
2362 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2364 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2366 initBuffers(numValues);
2368 // Setup input buffer & copy data
2369 uploadInputBuffer(inputs, numValues);
2371 // Create command pool
2372 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2374 // Create command buffer
2376 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2377 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2378 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2379 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2381 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2382 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2384 const VkDescriptorSetAllocateInfo allocInfo =
2386 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2390 &*descriptorSetLayout
2393 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2395 // Create pipeline layout
2397 const VkDescriptorSetLayout descriptorSetLayouts[] =
2399 *descriptorSetLayout,
2400 m_extraResourcesLayout
2402 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2404 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2405 DE_NULL, // const void* pNext;
2406 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2407 numDescriptorSets, // deUint32 CdescriptorSetCount;
2408 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2409 0u, // deUint32 pushConstantRangeCount;
2410 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2413 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2418 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2423 const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2426 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2427 DE_NULL, // const void* pNext;
2428 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2429 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2430 *computeShaderModule, // VkShaderModule shader;
2431 "main", // const char* pName;
2432 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
2436 const VkComputePipelineCreateInfo computePipelineParams =
2438 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2439 DE_NULL, // const void* pNext;
2440 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2441 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2442 *pipelineLayout, // VkPipelineLayout layout;
2443 0u, // VkPipeline basePipelineHandle;
2444 0u, // int32_t basePipelineIndex;
2447 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2450 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2452 const deUint32 inputStride = getInputStride();
2453 const deUint32 outputStride = getOutputStride();
2455 while (curOffset < numValues)
2457 Move<VkCommandBuffer> cmdBuffer;
2458 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
2460 // Update descriptors
2462 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2464 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2466 *m_outputBuffer, // VkBuffer buffer;
2467 curOffset * outputStride, // VkDeviceSize offset;
2468 numToExec * outputStride // VkDeviceSize range;
2471 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2475 const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2477 *m_inputBuffer, // VkBuffer buffer;
2478 curOffset * inputStride, // VkDeviceSize offset;
2479 numToExec * inputStride // VkDeviceSize range;
2482 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2485 descriptorSetUpdateBuilder.update(vk, vkDevice);
2488 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2489 beginCommandBuffer(vk, *cmdBuffer);
2490 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2493 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2494 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2497 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2499 // Insert a barrier so data written by the shader is available to the host
2501 const VkBufferMemoryBarrier bufferBarrier =
2503 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2504 DE_NULL, // const void* pNext;
2505 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2506 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2507 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2508 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2509 *m_outputBuffer, // VkBuffer buffer;
2510 0, // VkDeviceSize offset;
2511 VK_WHOLE_SIZE, // VkDeviceSize size;
2514 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2515 0, (const VkMemoryBarrier*)DE_NULL,
2517 0, (const VkImageMemoryBarrier*)DE_NULL);
2520 endCommandBuffer(vk, *cmdBuffer);
2522 curOffset += numToExec;
2525 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2529 readOutputBuffer(outputs, numValues);
2532 // Tessellation utils
2534 static std::string generateVertexShaderForTess (void)
2536 std::ostringstream src;
2537 src << "#version 450\n"
2538 << "void main (void)\n{\n"
2539 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2545 class TessellationExecutor : public BufferIoExecutor
2548 TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2549 virtual ~TessellationExecutor (void);
2551 void renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2554 const VkDescriptorSetLayout m_extraResourcesLayout;
2557 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2558 : BufferIoExecutor (context, shaderSpec)
2559 , m_extraResourcesLayout (extraResourcesLayout)
2561 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2563 if (!features.tessellationShader)
2564 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2567 TessellationExecutor::~TessellationExecutor (void)
2571 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2573 const size_t inputBufferSize = numValues * getInputStride();
2574 const VkDevice vkDevice = m_context.getDevice();
2575 const DeviceInterface& vk = m_context.getDeviceInterface();
2576 const VkQueue queue = m_context.getUniversalQueue();
2577 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2578 Allocator& memAlloc = m_context.getDefaultAllocator();
2580 const tcu::UVec2 renderSize (DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2582 Move<VkImage> colorImage;
2583 de::MovePtr<Allocation> colorImageAlloc;
2584 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
2585 Move<VkImageView> colorImageView;
2587 Move<VkRenderPass> renderPass;
2588 Move<VkFramebuffer> framebuffer;
2589 Move<VkPipelineLayout> pipelineLayout;
2590 Move<VkPipeline> graphicsPipeline;
2592 Move<VkShaderModule> vertexShaderModule;
2593 Move<VkShaderModule> tessControlShaderModule;
2594 Move<VkShaderModule> tessEvalShaderModule;
2595 Move<VkShaderModule> fragmentShaderModule;
2597 Move<VkCommandPool> cmdPool;
2598 Move<VkCommandBuffer> cmdBuffer;
2600 Move<VkDescriptorPool> descriptorPool;
2601 Move<VkDescriptorSetLayout> descriptorSetLayout;
2602 Move<VkDescriptorSet> descriptorSet;
2603 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2605 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2607 // Create color image
2609 const VkImageCreateInfo colorImageParams =
2611 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2612 DE_NULL, // const void* pNext;
2613 0u, // VkImageCreateFlags flags;
2614 VK_IMAGE_TYPE_2D, // VkImageType imageType;
2615 colorFormat, // VkFormat format;
2616 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
2617 1u, // deUint32 mipLevels;
2618 1u, // deUint32 arraySize;
2619 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2620 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
2621 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
2622 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2623 1u, // deUint32 queueFamilyCount;
2624 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
2625 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
2628 colorImage = createImage(vk, vkDevice, &colorImageParams);
2630 // Allocate and bind color image memory
2631 colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2632 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2635 // Create color attachment view
2637 const VkImageViewCreateInfo colorImageViewParams =
2639 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
2640 DE_NULL, // const void* pNext;
2641 0u, // VkImageViewCreateFlags flags;
2642 *colorImage, // VkImage image;
2643 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
2644 colorFormat, // VkFormat format;
2646 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
2647 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
2648 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
2649 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
2650 }, // VkComponentsMapping components;
2652 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
2653 0u, // deUint32 baseMipLevel;
2654 1u, // deUint32 mipLevels;
2655 0u, // deUint32 baseArraylayer;
2656 1u // deUint32 layerCount;
2657 } // VkImageSubresourceRange subresourceRange;
2660 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2663 // Create render pass
2665 const VkAttachmentDescription colorAttachmentDescription =
2667 0u, // VkAttachmentDescriptorFlags flags;
2668 colorFormat, // VkFormat format;
2669 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2670 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
2671 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
2672 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
2673 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
2674 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
2675 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
2678 const VkAttachmentDescription attachments[1] =
2680 colorAttachmentDescription
2683 const VkAttachmentReference colorAttachmentReference =
2685 0u, // deUint32 attachment;
2686 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
2689 const VkSubpassDescription subpassDescription =
2691 0u, // VkSubpassDescriptionFlags flags;
2692 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
2693 0u, // deUint32 inputCount;
2694 DE_NULL, // const VkAttachmentReference* pInputAttachments;
2695 1u, // deUint32 colorCount;
2696 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
2697 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
2698 DE_NULL, // VkAttachmentReference depthStencilAttachment;
2699 0u, // deUint32 preserveCount;
2700 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
2703 const VkRenderPassCreateInfo renderPassParams =
2705 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
2706 DE_NULL, // const void* pNext;
2707 0u, // VkRenderPassCreateFlags flags;
2708 1u, // deUint32 attachmentCount;
2709 attachments, // const VkAttachmentDescription* pAttachments;
2710 1u, // deUint32 subpassCount;
2711 &subpassDescription, // const VkSubpassDescription* pSubpasses;
2712 0u, // deUint32 dependencyCount;
2713 DE_NULL // const VkSubpassDependency* pDependencies;
2716 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
2719 // Create framebuffer
2721 const VkFramebufferCreateInfo framebufferParams =
2723 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
2724 DE_NULL, // const void* pNext;
2725 0u, // VkFramebufferCreateFlags flags;
2726 *renderPass, // VkRenderPass renderPass;
2727 1u, // deUint32 attachmentCount;
2728 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
2729 (deUint32)renderSize.x(), // deUint32 width;
2730 (deUint32)renderSize.y(), // deUint32 height;
2731 1u // deUint32 layers;
2734 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
2737 // Create descriptors
2739 DescriptorPoolBuilder descriptorPoolBuilder;
2740 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2742 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2743 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2744 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2745 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2747 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2748 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2750 const VkDescriptorSetAllocateInfo allocInfo =
2752 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2756 &*descriptorSetLayout
2759 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2760 // Update descriptors
2762 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2763 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2765 *m_outputBuffer, // VkBuffer buffer;
2766 0u, // VkDeviceSize offset;
2767 VK_WHOLE_SIZE // VkDeviceSize range;
2770 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2772 VkDescriptorBufferInfo inputDescriptorBufferInfo =
2774 0, // VkBuffer buffer;
2775 0u, // VkDeviceSize offset;
2776 VK_WHOLE_SIZE // VkDeviceSize range;
2779 if (inputBufferSize > 0)
2781 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
2783 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2786 descriptorSetUpdateBuilder.update(vk, vkDevice);
2790 // Create pipeline layout
2792 const VkDescriptorSetLayout descriptorSetLayouts[] =
2794 *descriptorSetLayout,
2795 m_extraResourcesLayout
2797 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2799 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2800 DE_NULL, // const void* pNext;
2801 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2802 numDescriptorSets, // deUint32 descriptorSetCount;
2803 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2804 0u, // deUint32 pushConstantRangeCount;
2805 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2808 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2811 // Create shader modules
2813 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
2814 tessControlShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
2815 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
2816 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
2821 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
2823 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
2824 DE_NULL, // const void* pNext;
2825 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
2826 0u, // deUint32 bindingCount;
2827 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
2828 0u, // deUint32 attributeCount;
2829 DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
2832 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
2833 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
2835 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
2836 vkDevice, // const VkDevice device
2837 *pipelineLayout, // const VkPipelineLayout pipelineLayout
2838 *vertexShaderModule, // const VkShaderModule vertexShaderModule
2839 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
2840 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
2841 DE_NULL, // const VkShaderModule geometryShaderModule
2842 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
2843 *renderPass, // const VkRenderPass renderPass
2844 viewports, // const std::vector<VkViewport>& viewports
2845 scissors, // const std::vector<VkRect2D>& scissors
2846 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
2847 0u, // const deUint32 subpass
2848 patchControlPoints, // const deUint32 patchControlPoints
2849 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
2852 // Create command pool
2853 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2855 // Create command buffer
2857 const VkClearValue clearValue = getDefaultClearColor();
2859 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2861 beginCommandBuffer(vk, *cmdBuffer);
2863 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
2865 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
2868 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2869 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2872 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
2874 endRenderPass(vk, *cmdBuffer);
2876 // Insert a barrier so data written by the shader is available to the host
2878 const VkBufferMemoryBarrier bufferBarrier =
2880 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2881 DE_NULL, // const void* pNext;
2882 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2883 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2884 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2885 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2886 *m_outputBuffer, // VkBuffer buffer;
2887 0, // VkDeviceSize offset;
2888 VK_WHOLE_SIZE, // VkDeviceSize size;
2891 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2892 0, (const VkMemoryBarrier*)DE_NULL,
2894 0, (const VkImageMemoryBarrier*)DE_NULL);
2897 endCommandBuffer(vk, *cmdBuffer);
2901 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2904 // TessControlExecutor
2906 class TessControlExecutor : public TessellationExecutor
2909 TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2910 virtual ~TessControlExecutor (void);
2912 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
2914 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2917 static std::string generateTessControlShader (const ShaderSpec& shaderSpec);
2920 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2921 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
2925 TessControlExecutor::~TessControlExecutor (void)
2929 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
2931 std::ostringstream src;
2932 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
2934 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
2935 src << "#extension GL_EXT_tessellation_shader : require\n\n";
2937 if (!shaderSpec.globalDeclarations.empty())
2938 src << shaderSpec.globalDeclarations << "\n";
2940 src << "\nlayout(vertices = 1) out;\n\n";
2942 declareBufferBlocks(src, shaderSpec);
2944 src << "void main (void)\n{\n";
2946 for (int ndx = 0; ndx < 2; ndx++)
2947 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
2949 for (int ndx = 0; ndx < 4; ndx++)
2950 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
2953 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
2955 generateExecBufferIo(src, shaderSpec, "invocationId");
2962 static std::string generateEmptyTessEvalShader ()
2964 std::ostringstream src;
2966 src << "#version 450\n"
2967 "#extension GL_EXT_tessellation_shader : require\n\n";
2969 src << "layout(triangles, ccw) in;\n";
2971 src << "\nvoid main (void)\n{\n"
2972 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
2978 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2980 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
2981 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
2982 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
2983 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
2986 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2988 const deUint32 patchSize = 3;
2990 initBuffers(numValues);
2992 // Setup input buffer & copy data
2993 uploadInputBuffer(inputs, numValues);
2995 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
2998 readOutputBuffer(outputs, numValues);
3001 // TessEvaluationExecutor
3003 class TessEvaluationExecutor : public TessellationExecutor
3006 TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3007 virtual ~TessEvaluationExecutor (void);
3009 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3011 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3014 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec);
3017 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3018 : TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3022 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3026 static std::string generatePassthroughTessControlShader (void)
3028 std::ostringstream src;
3030 src << "#version 450\n"
3031 "#extension GL_EXT_tessellation_shader : require\n\n";
3033 src << "layout(vertices = 1) out;\n\n";
3035 src << "void main (void)\n{\n";
3037 for (int ndx = 0; ndx < 2; ndx++)
3038 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3040 for (int ndx = 0; ndx < 4; ndx++)
3041 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3048 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3050 std::ostringstream src;
3052 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3054 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3055 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3057 if (!shaderSpec.globalDeclarations.empty())
3058 src << shaderSpec.globalDeclarations << "\n";
3062 src << "layout(isolines, equal_spacing) in;\n\n";
3064 declareBufferBlocks(src, shaderSpec);
3066 src << "void main (void)\n{\n"
3067 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3068 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3070 generateExecBufferIo(src, shaderSpec, "invocationId");
3077 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3079 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3080 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3081 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3082 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3085 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3087 const int patchSize = 2;
3088 const int alignedValues = deAlign32(numValues, patchSize);
3090 // Initialize buffers with aligned value count to make room for padding
3091 initBuffers(alignedValues);
3093 // Setup input buffer & copy data
3094 uploadInputBuffer(inputs, numValues);
3096 renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3099 readOutputBuffer(outputs, numValues);
3106 ShaderExecutor::~ShaderExecutor (void)
3110 bool ShaderExecutor::areInputs16Bit (void) const
3112 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3114 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3120 bool ShaderExecutor::areOutputs16Bit (void) const
3122 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3124 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3130 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3132 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3139 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3143 case glu::SHADERTYPE_VERTEX: VertexShaderExecutor::generateSources (shaderSpec, dst); break;
3144 case glu::SHADERTYPE_TESSELLATION_CONTROL: TessControlExecutor::generateSources (shaderSpec, dst); break;
3145 case glu::SHADERTYPE_TESSELLATION_EVALUATION: TessEvaluationExecutor::generateSources (shaderSpec, dst); break;
3146 case glu::SHADERTYPE_GEOMETRY: GeometryShaderExecutor::generateSources (shaderSpec, dst); break;
3147 case glu::SHADERTYPE_FRAGMENT: FragmentShaderExecutor::generateSources (shaderSpec, dst); break;
3148 case glu::SHADERTYPE_COMPUTE: ComputeShaderExecutor::generateSources (shaderSpec, dst); break;
3150 TCU_THROW(InternalError, "Unsupported shader type");
3154 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3158 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (context, shaderSpec, extraResourcesLayout);
3159 case glu::SHADERTYPE_TESSELLATION_CONTROL: return new TessControlExecutor (context, shaderSpec, extraResourcesLayout);
3160 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return new TessEvaluationExecutor (context, shaderSpec, extraResourcesLayout);
3161 case glu::SHADERTYPE_GEOMETRY: return new GeometryShaderExecutor (context, shaderSpec, extraResourcesLayout);
3162 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (context, shaderSpec, extraResourcesLayout);
3163 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (context, shaderSpec, extraResourcesLayout);
3165 TCU_THROW(InternalError, "Unsupported shader type");