Add pipeline barriers flushing buffer data to host
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / shaderexecutor / vktShaderExecutor.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 #include "gluShaderUtil.hpp"
39
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47
48 #include <map>
49 #include <sstream>
50 #include <iostream>
51
52 using std::vector;
53 using namespace vk;
54
55 namespace vkt
56 {
57 namespace shaderexecutor
58 {
59 namespace
60 {
61
62 enum
63 {
64         DEFAULT_RENDER_WIDTH    = 100,
65         DEFAULT_RENDER_HEIGHT   = 100,
66 };
67
68 // Common typedefs
69
70 typedef de::SharedPtr<Unique<VkImage> >         VkImageSp;
71 typedef de::SharedPtr<Unique<VkImageView> >     VkImageViewSp;
72 typedef de::SharedPtr<Unique<VkBuffer> >        VkBufferSp;
73 typedef de::SharedPtr<Allocation>                       AllocationSp;
74
75 static VkFormat getAttributeFormat(const glu::DataType dataType);
76
77 // Shader utilities
78
79 static VkClearValue     getDefaultClearColor (void)
80 {
81         return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
82 }
83
84 static std::string generateEmptyFragmentSource (void)
85 {
86         std::ostringstream src;
87
88         src << "#version 450\n"
89                    "layout(location=0) out highp vec4 o_color;\n";
90
91         src << "void main (void)\n{\n";
92         src << "        o_color = vec4(0.0);\n";
93         src << "}\n";
94
95         return src.str();
96 }
97
98 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
99 {
100         for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
101         {
102                 if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
103                 {
104                         if(glu::isDataTypeVector(symIter->varType.getBasicType()))
105                         {
106                                 for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
107                                 {
108                                         src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
109                                 }
110                         }
111                         else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
112                         {
113                                 int maxRow = 0;
114                                 int maxCol = 0;
115                                 switch (symIter->varType.getBasicType())
116                                 {
117                                 case glu::TYPE_FLOAT_MAT2:
118                                         maxRow = maxCol = 2;
119                                         break;
120                                 case glu::TYPE_FLOAT_MAT2X3:
121                                         maxRow = 2;
122                                         maxCol = 3;
123                                         break;
124                                 case glu::TYPE_FLOAT_MAT2X4:
125                                         maxRow = 2;
126                                         maxCol = 4;
127                                         break;
128                                 case glu::TYPE_FLOAT_MAT3X2:
129                                         maxRow = 3;
130                                         maxCol = 2;
131                                         break;
132                                 case glu::TYPE_FLOAT_MAT3:
133                                         maxRow = maxCol = 3;
134                                         break;
135                                 case glu::TYPE_FLOAT_MAT3X4:
136                                         maxRow = 3;
137                                         maxCol = 4;
138                                         break;
139                                 case glu::TYPE_FLOAT_MAT4X2:
140                                         maxRow = 4;
141                                         maxCol = 2;
142                                         break;
143                                 case glu::TYPE_FLOAT_MAT4X3:
144                                         maxRow = 4;
145                                         maxCol = 3;
146                                         break;
147                                 case glu::TYPE_FLOAT_MAT4:
148                                         maxRow = maxCol = 4;
149                                         break;
150                                 default:
151                                         DE_ASSERT(false);
152                                         break;
153                                 }
154
155                                 for(int i = 0; i < maxRow; i++)
156                                 for(int j = 0; j < maxCol; j++)
157                                 {
158                                         src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
159                                 }
160                         }
161                         else
162                         {
163                                         src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
164                         }
165                 }
166         }
167 }
168
169 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
170 {
171         std::ostringstream      src;
172         int                                     location        = 0;
173
174         src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
175
176         if (!shaderSpec.globalDeclarations.empty())
177                 src << shaderSpec.globalDeclarations << "\n";
178
179         src << "layout(location = " << location << ") in highp vec4 a_position;\n";
180
181         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
182         {
183                 location++;
184                 src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
185                         << "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
186         }
187
188         src << "\nvoid main (void)\n{\n"
189                 << "    gl_Position = a_position;\n"
190                 << "    gl_PointSize = 1.0;\n";
191
192         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
193                 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
194
195         src << "}\n";
196
197         return src.str();
198 }
199
200 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
201 {
202         DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
203
204         std::ostringstream      src;
205
206         src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
207
208         if (!shaderSpec.globalDeclarations.empty())
209                 src << shaderSpec.globalDeclarations << "\n";
210
211         src << "layout(location = 0) in highp vec4 a_position;\n";
212
213         int                     locationNumber  = 1;
214         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
215         {
216                 src <<  "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
217         }
218
219         locationNumber = 0;
220         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
221         {
222                 DE_ASSERT(output->varType.isBasicType());
223
224                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
225                 {
226                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
227                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
228                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
229
230                         src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
231                 }
232                 else
233                         src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
234         }
235
236         src << "\n"
237                 << "void main (void)\n"
238                 << "{\n"
239                 << "    gl_Position = a_position;\n"
240                 << "    gl_PointSize = 1.0;\n";
241
242         // Declare & fetch local input variables
243         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
244         {
245                 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
246                 {
247                         const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
248                         src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
249                 }
250                 else
251                         src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
252         }
253
254         // Declare local output variables
255         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
256         {
257                 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
258                 {
259                         const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
260                         src << "\t" << tname << " " << output->name << ";\n";
261                         const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
262                         src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
263                 }
264                 else
265                         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
266         }
267
268         // Operation - indented to correct level.
269         {
270                 std::istringstream      opSrc   (shaderSpec.source);
271                 std::string                     line;
272
273                 while (std::getline(opSrc, line))
274                         src << "\t" << line << "\n";
275         }
276
277         if (shaderSpec.packFloat16Bit)
278                 packFloat16Bit(src, shaderSpec.outputs);
279
280         // Assignments to outputs.
281         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
282         {
283                 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
284                 {
285                         src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
286                 }
287                 else
288                 {
289                         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
290                         {
291                                 const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
292                                 const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
293
294                                 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
295                         }
296                         else
297                                 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
298                 }
299         }
300
301         src << "}\n";
302
303         return src.str();
304 }
305
306 struct FragmentOutputLayout
307 {
308         std::vector<const Symbol*>              locationSymbols;                //! Symbols by location
309         std::map<std::string, int>              locationMap;                    //! Map from symbol name to start location
310 };
311
312 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
313 {
314         for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
315         {
316                 const Symbol&                           output          = shaderSpec.outputs[outNdx];
317                 const int                                       location        = de::lookup(outLocationMap, output.name);
318                 const std::string                       outVarName      = outputPrefix + output.name;
319                 glu::VariableDeclaration        decl            (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
320
321                 TCU_CHECK_INTERNAL(output.varType.isBasicType());
322
323                 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
324                 {
325                         const int                       vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
326                         const glu::DataType     uintBasicType   = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
327                         const glu::VarType      uintType                (uintBasicType, glu::PRECISION_HIGHP);
328
329                         decl.varType = uintType;
330                         src << decl << ";\n";
331                 }
332                 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
333                 {
334                         const int                       vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
335                         const glu::DataType     intBasicType    = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
336                         const glu::VarType      intType                 (intBasicType, glu::PRECISION_HIGHP);
337
338                         decl.varType = intType;
339                         src << decl << ";\n";
340                 }
341                 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
342                 {
343                         const int                       vecSize                 = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
344                         const int                       numVecs                 = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
345                         const glu::DataType     uintBasicType   = glu::getDataTypeUintVec(vecSize);
346                         const glu::VarType      uintType                (uintBasicType, glu::PRECISION_HIGHP);
347
348                         decl.varType = uintType;
349                         for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
350                         {
351                                 decl.name                               = outVarName + "_" + de::toString(vecNdx);
352                                 decl.layout.location    = location + vecNdx;
353                                 src << decl << ";\n";
354                         }
355                 }
356                 else
357                         src << decl << ";\n";
358         }
359 }
360
361 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
362 {
363         if (isInput16Bit)
364                 packFloat16Bit(src, shaderSpec.outputs);
365
366         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
367         {
368                 const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
369
370                 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
371                         src << "        o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
372                 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
373                 {
374                         const int       numVecs         = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
375
376                         for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
377                                 if (useIntOutputs)
378                                         src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
379                                 else
380                                         src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
381                 }
382                 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
383                 {
384                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
385                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
386
387                         src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
388                 }
389                 else
390                         src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
391         }
392 }
393
394 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
395 {
396         std::ostringstream      src;
397
398         src <<"#version 450\n";
399
400         if (!shaderSpec.globalDeclarations.empty())
401                 src << shaderSpec.globalDeclarations << "\n";
402
403         int locationNumber = 0;
404         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
405         {
406                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
407                 {
408                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
409                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
410                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
411
412                         src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
413                 }
414                 else
415                         src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
416         }
417
418         generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
419
420         src << "\nvoid main (void)\n{\n";
421
422         generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
423
424         src << "}\n";
425
426         return src.str();
427 }
428
429 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
430 {
431         DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
432
433         std::ostringstream      src;
434
435         src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
436
437         if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
438                 src << "#extension GL_EXT_geometry_shader : require\n";
439
440         if (!shaderSpec.globalDeclarations.empty())
441                 src << shaderSpec.globalDeclarations << "\n";
442
443         src << "layout(points) in;\n"
444                 << "layout(points, max_vertices = 1) out;\n";
445
446         int locationNumber = 0;
447         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
448                 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
449
450         locationNumber = 0;
451         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
452         {
453                 DE_ASSERT(output->varType.isBasicType());
454
455                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
456                 {
457                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
458                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
459                         const glu::VarType              intType         (intBaseType, glu::PRECISION_HIGHP);
460
461                         src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
462                 }
463                 else
464                         src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
465         }
466
467         src << "\n"
468                 << "void main (void)\n"
469                 << "{\n"
470                 << "    gl_Position = gl_in[0].gl_Position;\n"
471                 << (pointSizeSupported ? "      gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
472
473         // Fetch input variables
474         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
475                 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
476
477         // Declare local output variables.
478         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
479                 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
480
481         src << "\n";
482
483         // Operation - indented to correct level.
484         {
485                 std::istringstream      opSrc   (shaderSpec.source);
486                 std::string                     line;
487
488                 while (std::getline(opSrc, line))
489                         src << "\t" << line << "\n";
490         }
491
492         // Assignments to outputs.
493         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
494         {
495                 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
496                 {
497                         const int                               vecSize         = glu::getDataTypeScalarSize(output->varType.getBasicType());
498                         const glu::DataType             intBaseType     = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
499
500                         src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
501                 }
502                 else
503                         src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
504         }
505
506         src << "        EmitVertex();\n"
507                 << "    EndPrimitive();\n"
508                 << "}\n";
509
510         return src.str();
511 }
512
513 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
514 {
515         std::ostringstream src;
516         src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
517         if (!shaderSpec.globalDeclarations.empty())
518                 src << shaderSpec.globalDeclarations << "\n";
519
520         int                     locationNumber  = 0;
521         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
522         {
523                 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
524         }
525
526         generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
527
528         src << "\nvoid main (void)\n{\n";
529
530         // Declare & fetch local input variables
531         for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
532         {
533                 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
534                 {
535                         const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
536                         src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
537                 }
538                 else
539                         src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
540         }
541
542         // Declare output variables
543         for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
544         {
545                 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
546                 {
547                         const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
548                         src << "\t" << tname << " " << output->name << ";\n";
549                         const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
550                         src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
551                 }
552                 else
553                         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
554         }
555
556         // Operation - indented to correct level.
557         {
558                 std::istringstream      opSrc   (shaderSpec.source);
559                 std::string                     line;
560
561                 while (std::getline(opSrc, line))
562                         src << "\t" << line << "\n";
563         }
564
565         generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
566
567         src << "}\n";
568
569         return src.str();
570 }
571
572 // FragmentOutExecutor
573
574 class FragmentOutExecutor : public ShaderExecutor
575 {
576 public:
577                                                                                                                 FragmentOutExecutor             (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
578         virtual                                                                                         ~FragmentOutExecutor    (void);
579
580         virtual void                                                                            execute                                 (int                                    numValues,
581                                                                                                                                                                  const void* const*             inputs,
582                                                                                                                                                                  void* const*                   outputs,
583                                                                                                                                                                  VkDescriptorSet                extraResources);
584
585 protected:
586         const glu::ShaderType                                                           m_shaderType;
587         const FragmentOutputLayout                                                      m_outputLayout;
588
589 private:
590         void                                                                                            bindAttributes                  (int                                    numValues,
591                                                                                                                                                                  const void* const*             inputs);
592
593         void                                                                                            addAttribute                    (deUint32                               bindingLocation,
594                                                                                                                                                                  VkFormat                               format,
595                                                                                                                                                                  deUint32                               sizePerElement,
596                                                                                                                                                                  deUint32                               count,
597                                                                                                                                                                  const void*                    dataPtr);
598         // reinit render data members
599         virtual void                                                                            clearRenderData                 (void);
600
601         const VkDescriptorSetLayout                                                     m_extraResourcesLayout;
602
603         std::vector<VkVertexInputBindingDescription>            m_vertexBindingDescriptions;
604         std::vector<VkVertexInputAttributeDescription>          m_vertexAttributeDescriptions;
605         std::vector<VkBufferSp>                                                         m_vertexBuffers;
606         std::vector<AllocationSp>                                                       m_vertexBufferAllocs;
607 };
608
609 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
610 {
611         FragmentOutputLayout    ret;
612         int                                             location        = 0;
613
614         for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
615         {
616                 const int       numLocations    = glu::getDataTypeNumLocations(it->varType.getBasicType());
617
618                 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
619                 de::insert(ret.locationMap, it->name, location);
620                 location += numLocations;
621
622                 for (int ndx = 0; ndx < numLocations; ++ndx)
623                         ret.locationSymbols.push_back(&*it);
624         }
625
626         return ret;
627 }
628
629 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
630         : ShaderExecutor                        (context, shaderSpec)
631         , m_shaderType                          (shaderType)
632         , m_outputLayout                        (computeFragmentOutputLayout(m_shaderSpec.outputs))
633         , m_extraResourcesLayout        (extraResourcesLayout)
634 {
635         const VkPhysicalDevice          physicalDevice = m_context.getPhysicalDevice();
636         const InstanceInterface&        vki = m_context.getInstanceInterface();
637
638         // Input attributes
639         for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
640         {
641                 const Symbol&                           symbol = m_shaderSpec.inputs[inputNdx];
642                 const glu::DataType                     basicType = symbol.varType.getBasicType();
643                 const VkFormat                          format = getAttributeFormat(basicType);
644                 const VkFormatProperties        formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
645                 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
646                         TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
647         }
648 }
649
650 FragmentOutExecutor::~FragmentOutExecutor (void)
651 {
652 }
653
654 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
655 {
656         std::vector<tcu::Vec2> positions(numValues);
657         for (int valNdx = 0; valNdx < numValues; valNdx++)
658         {
659                 const int               ix              = valNdx % renderSize.x();
660                 const int               iy              = valNdx / renderSize.x();
661                 const float             fx              = -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
662                 const float             fy              = -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
663
664                 positions[valNdx] = tcu::Vec2(fx, fy);
665         }
666
667         return positions;
668 }
669
670 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
671 {
672         const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
673         {
674                 tcu::TextureFormat::R,
675                 tcu::TextureFormat::RG,
676                 tcu::TextureFormat::RGBA,       // No RGB variants available.
677                 tcu::TextureFormat::RGBA
678         };
679
680         const glu::DataType                                     basicType               = outputType.getBasicType();
681         const int                                                       numComps                = glu::getDataTypeNumComponents(basicType);
682         tcu::TextureFormat::ChannelType         channelType;
683
684         switch (glu::getDataTypeScalarType(basicType))
685         {
686                 case glu::TYPE_UINT:    channelType = tcu::TextureFormat::UNSIGNED_INT32;                                                                                                               break;
687                 case glu::TYPE_INT:             channelType = tcu::TextureFormat::SIGNED_INT32;                                                                                                                 break;
688                 case glu::TYPE_BOOL:    channelType = tcu::TextureFormat::SIGNED_INT32;                                                                                                                 break;
689                 case glu::TYPE_FLOAT:   channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;                   break;
690                 case glu::TYPE_FLOAT16: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;              break;
691                 default:
692                         throw tcu::InternalError("Invalid output type");
693         }
694
695         DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
696
697         return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
698 }
699
700 static VkFormat getAttributeFormat (const glu::DataType dataType)
701 {
702         switch (dataType)
703         {
704                 case glu::TYPE_FLOAT16:                 return VK_FORMAT_R16_SFLOAT;
705                 case glu::TYPE_FLOAT16_VEC2:    return VK_FORMAT_R16G16_SFLOAT;
706                 case glu::TYPE_FLOAT16_VEC3:    return VK_FORMAT_R16G16B16_SFLOAT;
707                 case glu::TYPE_FLOAT16_VEC4:    return VK_FORMAT_R16G16B16A16_SFLOAT;
708
709                 case glu::TYPE_FLOAT:                   return VK_FORMAT_R32_SFLOAT;
710                 case glu::TYPE_FLOAT_VEC2:              return VK_FORMAT_R32G32_SFLOAT;
711                 case glu::TYPE_FLOAT_VEC3:              return VK_FORMAT_R32G32B32_SFLOAT;
712                 case glu::TYPE_FLOAT_VEC4:              return VK_FORMAT_R32G32B32A32_SFLOAT;
713
714                 case glu::TYPE_INT:                             return VK_FORMAT_R32_SINT;
715                 case glu::TYPE_INT_VEC2:                return VK_FORMAT_R32G32_SINT;
716                 case glu::TYPE_INT_VEC3:                return VK_FORMAT_R32G32B32_SINT;
717                 case glu::TYPE_INT_VEC4:                return VK_FORMAT_R32G32B32A32_SINT;
718
719                 case glu::TYPE_UINT:                    return VK_FORMAT_R32_UINT;
720                 case glu::TYPE_UINT_VEC2:               return VK_FORMAT_R32G32_UINT;
721                 case glu::TYPE_UINT_VEC3:               return VK_FORMAT_R32G32B32_UINT;
722                 case glu::TYPE_UINT_VEC4:               return VK_FORMAT_R32G32B32A32_UINT;
723
724                 case glu::TYPE_FLOAT_MAT2:              return VK_FORMAT_R32G32_SFLOAT;
725                 case glu::TYPE_FLOAT_MAT2X3:    return VK_FORMAT_R32G32B32_SFLOAT;
726                 case glu::TYPE_FLOAT_MAT2X4:    return VK_FORMAT_R32G32B32A32_SFLOAT;
727                 case glu::TYPE_FLOAT_MAT3X2:    return VK_FORMAT_R32G32_SFLOAT;
728                 case glu::TYPE_FLOAT_MAT3:              return VK_FORMAT_R32G32B32_SFLOAT;
729                 case glu::TYPE_FLOAT_MAT3X4:    return VK_FORMAT_R32G32B32A32_SFLOAT;
730                 case glu::TYPE_FLOAT_MAT4X2:    return VK_FORMAT_R32G32_SFLOAT;
731                 case glu::TYPE_FLOAT_MAT4X3:    return VK_FORMAT_R32G32B32_SFLOAT;
732                 case glu::TYPE_FLOAT_MAT4:              return VK_FORMAT_R32G32B32A32_SFLOAT;
733                 default:
734                         DE_ASSERT(false);
735                         return VK_FORMAT_UNDEFINED;
736         }
737 }
738
739 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
740 {
741         // Add binding specification
742         const deUint32                                                  binding = (deUint32)m_vertexBindingDescriptions.size();
743         const VkVertexInputBindingDescription   bindingDescription =
744         {
745                 binding,
746                 sizePerElement,
747                 VK_VERTEX_INPUT_RATE_VERTEX
748         };
749
750         m_vertexBindingDescriptions.push_back(bindingDescription);
751
752         // Add location and format specification
753         const VkVertexInputAttributeDescription attributeDescription =
754         {
755                 bindingLocation,                        // deUint32     location;
756                 binding,                                        // deUint32     binding;
757                 format,                                         // VkFormat     format;
758                 0u,                                                     // deUint32     offsetInBytes;
759         };
760
761         m_vertexAttributeDescriptions.push_back(attributeDescription);
762
763         // Upload data to buffer
764         const VkDevice                          vkDevice                        = m_context.getDevice();
765         const DeviceInterface&          vk                                      = m_context.getDeviceInterface();
766         const deUint32                          queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
767
768         const VkDeviceSize                      inputSize                       = sizePerElement * count;
769         const VkBufferCreateInfo        vertexBufferParams      =
770         {
771                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,           // VkStructureType              sType;
772                 DE_NULL,                                                                        // const void*                  pNext;
773                 0u,                                                                                     // VkBufferCreateFlags  flags;
774                 inputSize,                                                                      // VkDeviceSize                 size;
775                 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,                      // VkBufferUsageFlags   usage;
776                 VK_SHARING_MODE_EXCLUSIVE,                                      // VkSharingMode                sharingMode;
777                 1u,                                                                                     // deUint32                             queueFamilyCount;
778                 &queueFamilyIndex                                                       // const deUint32*              pQueueFamilyIndices;
779         };
780
781         Move<VkBuffer>                  buffer  = createBuffer(vk, vkDevice, &vertexBufferParams);
782         de::MovePtr<Allocation> alloc   = m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
783
784         VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
785
786         deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
787         flushAlloc(vk, vkDevice, *alloc);
788
789         m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
790         m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
791 }
792
793 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
794 {
795         // Input attributes
796         for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
797         {
798                 const Symbol&           symbol                  = m_shaderSpec.inputs[inputNdx];
799                 const void*                     ptr                             = inputs[inputNdx];
800                 const glu::DataType     basicType               = symbol.varType.getBasicType();
801                 const int                       vecSize                 = glu::getDataTypeScalarSize(basicType);
802                 const VkFormat          format                  = getAttributeFormat(basicType);
803                 int                                     elementSize             = 0;
804                 int                                     numAttrsToAdd   = 1;
805
806                 if (glu::isDataTypeFloatOrVec(basicType))
807                         elementSize = sizeof(float);
808                 else if (glu::isDataTypeFloat16OrVec(basicType))
809                         elementSize = sizeof(deUint16);
810                 else if (glu::isDataTypeIntOrIVec(basicType))
811                         elementSize = sizeof(int);
812                 else if (glu::isDataTypeUintOrUVec(basicType))
813                         elementSize = sizeof(deUint32);
814                 else if (glu::isDataTypeMatrix(basicType))
815                 {
816                         int             numRows = glu::getDataTypeMatrixNumRows(basicType);
817                         int             numCols = glu::getDataTypeMatrixNumColumns(basicType);
818
819                         elementSize = numRows * numCols * (int)sizeof(float);
820                         numAttrsToAdd = numCols;
821                 }
822                 else
823                         DE_ASSERT(false);
824
825                 // add attributes, in case of matrix every column is binded as an attribute
826                 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
827                 {
828                         addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
829                 }
830         }
831 }
832
833 void FragmentOutExecutor::clearRenderData (void)
834 {
835         m_vertexBindingDescriptions.clear();
836         m_vertexAttributeDescriptions.clear();
837         m_vertexBuffers.clear();
838         m_vertexBufferAllocs.clear();
839 }
840
841 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
842 {
843         const VkDescriptorSetLayoutCreateInfo   createInfo      =
844         {
845                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
846                 DE_NULL,
847                 (VkDescriptorSetLayoutCreateFlags)0,
848                 0u,
849                 DE_NULL,
850         };
851         return createDescriptorSetLayout(vkd, device, &createInfo);
852 }
853
854 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
855 {
856         const VkDescriptorPoolSize                      dummySize       =
857         {
858                 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
859                 1u,
860         };
861         const VkDescriptorPoolCreateInfo        createInfo      =
862         {
863                 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
864                 DE_NULL,
865                 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
866                 1u,
867                 1u,
868                 &dummySize
869         };
870         return createDescriptorPool(vkd, device, &createInfo);
871 }
872
873 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
874 {
875         const VkDescriptorSetAllocateInfo       allocInfo       =
876         {
877                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
878                 DE_NULL,
879                 pool,
880                 1u,
881                 &layout,
882         };
883         return allocateDescriptorSet(vkd, device, &allocInfo);
884 }
885
886 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
887 {
888         const VkDevice                                                                          vkDevice                                = m_context.getDevice();
889         const DeviceInterface&                                                          vk                                              = m_context.getDeviceInterface();
890         const VkQueue                                                                           queue                                   = m_context.getUniversalQueue();
891         const deUint32                                                                          queueFamilyIndex                = m_context.getUniversalQueueFamilyIndex();
892         Allocator&                                                                                      memAlloc                                = m_context.getDefaultAllocator();
893
894         const deUint32                                                                          renderSizeX                             = de::min(static_cast<deUint32>(DEFAULT_RENDER_WIDTH), (deUint32)numValues);
895         const deUint32                                                                          renderSizeY                             = ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
896         const tcu::UVec2                                                                        renderSize                              (renderSizeX, renderSizeY);
897         std::vector<tcu::Vec2>                                                          positions;
898
899         const bool                                                                                      useGeometryShader               = m_shaderType == glu::SHADERTYPE_GEOMETRY;
900
901         std::vector<VkImageSp>                                                          colorImages;
902         std::vector<VkImageMemoryBarrier>                                       colorImagePreRenderBarriers;
903         std::vector<VkImageMemoryBarrier>                                       colorImagePostRenderBarriers;
904         std::vector<AllocationSp>                                                       colorImageAllocs;
905         std::vector<VkAttachmentDescription>                            attachments;
906         std::vector<VkClearValue>                                                       attachmentClearValues;
907         std::vector<VkImageViewSp>                                                      colorImageViews;
908
909         std::vector<VkPipelineColorBlendAttachmentState>        colorBlendAttachmentStates;
910         std::vector<VkAttachmentReference>                                      colorAttachmentReferences;
911
912         Move<VkRenderPass>                                                                      renderPass;
913         Move<VkFramebuffer>                                                                     framebuffer;
914         Move<VkPipelineLayout>                                                          pipelineLayout;
915         Move<VkPipeline>                                                                        graphicsPipeline;
916
917         Move<VkShaderModule>                                                            vertexShaderModule;
918         Move<VkShaderModule>                                                            geometryShaderModule;
919         Move<VkShaderModule>                                                            fragmentShaderModule;
920
921         Move<VkCommandPool>                                                                     cmdPool;
922         Move<VkCommandBuffer>                                                           cmdBuffer;
923
924         Unique<VkDescriptorSetLayout>                                           emptyDescriptorSetLayout        (createEmptyDescriptorSetLayout(vk, vkDevice));
925         Unique<VkDescriptorPool>                                                        dummyDescriptorPool                     (createDummyDescriptorPool(vk, vkDevice));
926         Unique<VkDescriptorSet>                                                         emptyDescriptorSet                      (allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
927
928         clearRenderData();
929
930         // Compute positions - 1px points are used to drive fragment shading.
931         positions = computeVertexPositions(numValues, renderSize.cast<int>());
932
933         // Bind attributes
934         addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
935         bindAttributes(numValues, inputs);
936
937         // Create color images
938         {
939                 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
940                 {
941                         VK_FALSE,                                                                                                                                       // VkBool32                                             blendEnable;
942                         VK_BLEND_FACTOR_ONE,                                                                                                            // VkBlendFactor                                srcColorBlendFactor;
943                         VK_BLEND_FACTOR_ZERO,                                                                                                           // VkBlendFactor                                dstColorBlendFactor;
944                         VK_BLEND_OP_ADD,                                                                                                                        // VkBlendOp                                    blendOpColor;
945                         VK_BLEND_FACTOR_ONE,                                                                                                            // VkBlendFactor                                srcAlphaBlendFactor;
946                         VK_BLEND_FACTOR_ZERO,                                                                                                           // VkBlendFactor                                destAlphaBlendFactor;
947                         VK_BLEND_OP_ADD,                                                                                                                        // VkBlendOp                                    blendOpAlpha;
948                         (VK_COLOR_COMPONENT_R_BIT |
949                          VK_COLOR_COMPONENT_G_BIT |
950                          VK_COLOR_COMPONENT_B_BIT |
951                          VK_COLOR_COMPONENT_A_BIT)                                                                                                      // VkColorComponentFlags                colorWriteMask;
952                 };
953
954                 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
955                 {
956                         const bool              isFloat         = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
957                         const bool              isFloat16b      = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
958                         const bool              isSigned        = isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
959                         const bool              isBool          = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
960                         const VkFormat  colorFormat = isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT));
961
962                         {
963                                 const VkFormatProperties        formatProperties        = getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
964                                 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
965                                         TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
966                         }
967
968                         const VkImageCreateInfo  colorImageParams =
969                         {
970                                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                                                            // VkStructureType                              sType;
971                                 DE_NULL,                                                                                                                                        // const void*                                  pNext;
972                                 0u,                                                                                                                                                     // VkImageCreateFlags                   flags;
973                                 VK_IMAGE_TYPE_2D,                                                                                                                       // VkImageType                                  imageType;
974                                 colorFormat,                                                                                                                            // VkFormat                                             format;
975                                 { renderSize.x(), renderSize.y(), 1u },                                                                         // VkExtent3D                                   extent;
976                                 1u,                                                                                                                                                     // deUint32                                             mipLevels;
977                                 1u,                                                                                                                                                     // deUint32                                             arraySize;
978                                 VK_SAMPLE_COUNT_1_BIT,                                                                                                          // VkSampleCountFlagBits                samples;
979                                 VK_IMAGE_TILING_OPTIMAL,                                                                                                        // VkImageTiling                                tiling;
980                                 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,          // VkImageUsageFlags                    usage;
981                                 VK_SHARING_MODE_EXCLUSIVE,                                                                                                      // VkSharingMode                                sharingMode;
982                                 1u,                                                                                                                                                     // deUint32                                             queueFamilyCount;
983                                 &queueFamilyIndex,                                                                                                                      // const deUint32*                              pQueueFamilyIndices;
984                                 VK_IMAGE_LAYOUT_UNDEFINED,                                                                                                      // VkImageLayout                                initialLayout;
985                         };
986
987                         const VkAttachmentDescription colorAttachmentDescription =
988                         {
989                                 0u,                                                                                                                                                     // VkAttachmentDescriptorFlags  flags;
990                                 colorFormat,                                                                                                                            // VkFormat                                             format;
991                                 VK_SAMPLE_COUNT_1_BIT,                                                                                                          // VkSampleCountFlagBits                samples;
992                                 VK_ATTACHMENT_LOAD_OP_CLEAR,                                                                                            // VkAttachmentLoadOp                   loadOp;
993                                 VK_ATTACHMENT_STORE_OP_STORE,                                                                                           // VkAttachmentStoreOp                  storeOp;
994                                 VK_ATTACHMENT_LOAD_OP_DONT_CARE,                                                                                        // VkAttachmentLoadOp                   stencilLoadOp;
995                                 VK_ATTACHMENT_STORE_OP_DONT_CARE,                                                                                       // VkAttachmentStoreOp                  stencilStoreOp;
996                                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                                                                       // VkImageLayout                                initialLayout;
997                                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                                                                       // VkImageLayout                                finalLayout;
998                         };
999
1000                         Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1001                         colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1002                         attachmentClearValues.push_back(getDefaultClearColor());
1003
1004                         // Allocate and bind color image memory
1005                         {
1006                                 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1007                                 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1008                                 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1009
1010                                 attachments.push_back(colorAttachmentDescription);
1011                                 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1012
1013                                 const VkAttachmentReference colorAttachmentReference =
1014                                 {
1015                                         (deUint32) (colorImages.size() - 1),                    //      deUint32                attachment;
1016                                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL                //      VkImageLayout   layout;
1017                                 };
1018
1019                                 colorAttachmentReferences.push_back(colorAttachmentReference);
1020                         }
1021
1022                         // Create color attachment view
1023                         {
1024                                 const VkImageViewCreateInfo colorImageViewParams =
1025                                 {
1026                                         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,                       // VkStructureType                      sType;
1027                                         DE_NULL,                                                                                        // const void*                          pNext;
1028                                         0u,                                                                                                     // VkImageViewCreateFlags       flags;
1029                                         colorImages.back().get()->get(),                                        // VkImage                                      image;
1030                                         VK_IMAGE_VIEW_TYPE_2D,                                                          // VkImageViewType                      viewType;
1031                                         colorFormat,                                                                            // VkFormat                                     format;
1032                                         {
1033                                                 VK_COMPONENT_SWIZZLE_R,                                                 // VkComponentSwizzle           r;
1034                                                 VK_COMPONENT_SWIZZLE_G,                                                 // VkComponentSwizzle           g;
1035                                                 VK_COMPONENT_SWIZZLE_B,                                                 // VkComponentSwizzle           b;
1036                                                 VK_COMPONENT_SWIZZLE_A                                                  // VkComponentSwizzle           a;
1037                                         },                                                                                                      // VkComponentMapping           components;
1038                                         {
1039                                                 VK_IMAGE_ASPECT_COLOR_BIT,                                              // VkImageAspectFlags           aspectMask;
1040                                                 0u,                                                                                             // deUint32                                     baseMipLevel;
1041                                                 1u,                                                                                             // deUint32                                     mipLevels;
1042                                                 0u,                                                                                             // deUint32                                     baseArraySlice;
1043                                                 1u                                                                                              // deUint32                                     arraySize;
1044                                         }                                                                                                       // VkImageSubresourceRange      subresourceRange;
1045                                 };
1046
1047                                 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1048                                 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1049
1050                                 const VkImageMemoryBarrier      colorImagePreRenderBarrier =
1051                                 {
1052                                         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                                 // sType
1053                                         DE_NULL,                                                                                                // pNext
1054                                         0u,                                                                                                             // srcAccessMask
1055                                         (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1056                                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),                                  // dstAccessMask
1057                                         VK_IMAGE_LAYOUT_UNDEFINED,                                                              // oldLayout
1058                                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                               // newLayout
1059                                         VK_QUEUE_FAMILY_IGNORED,                                                                // srcQueueFamilyIndex
1060                                         VK_QUEUE_FAMILY_IGNORED,                                                                // dstQueueFamilyIndex
1061                                         colorImages.back().get()->get(),                                                // image
1062                                         {
1063                                                 VK_IMAGE_ASPECT_COLOR_BIT,                                                              // aspectMask
1064                                                 0u,                                                                                                             // baseMipLevel
1065                                                 1u,                                                                                                             // levelCount
1066                                                 0u,                                                                                                             // baseArrayLayer
1067                                                 1u,                                                                                                             // layerCount
1068                                         }                                                                                                               // subresourceRange
1069                                 };
1070                                 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1071
1072                                 const VkImageMemoryBarrier      colorImagePostRenderBarrier =
1073                                 {
1074                                         VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                                 // sType
1075                                         DE_NULL,                                                                                                // pNext
1076                                         (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1077                                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),                                  // srcAccessMask
1078                                         VK_ACCESS_TRANSFER_READ_BIT,                                                    // dstAccessMask
1079                                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                               // oldLayout
1080                                         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,                                   // newLayout
1081                                         VK_QUEUE_FAMILY_IGNORED,                                                                // srcQueueFamilyIndex
1082                                         VK_QUEUE_FAMILY_IGNORED,                                                                // dstQueueFamilyIndex
1083                                         colorImages.back().get()->get(),                                                // image
1084                                         {
1085                                                 VK_IMAGE_ASPECT_COLOR_BIT,                                                              // aspectMask
1086                                                 0u,                                                                                                             // baseMipLevel
1087                                                 1u,                                                                                                             // levelCount
1088                                                 0u,                                                                                                             // baseArrayLayer
1089                                                 1u,                                                                                                             // layerCount
1090                                         }                                                                                                               // subresourceRange
1091                                 };
1092                                 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1093                         }
1094                 }
1095         }
1096
1097         // Create render pass
1098         {
1099                 const VkSubpassDescription subpassDescription =
1100                 {
1101                         0u,                                                                                                     // VkSubpassDescriptionFlags    flags;
1102                         VK_PIPELINE_BIND_POINT_GRAPHICS,                                        // VkPipelineBindPoint                  pipelineBindPoint;
1103                         0u,                                                                                                     // deUint32                                             inputCount;
1104                         DE_NULL,                                                                                        // const VkAttachmentReference* pInputAttachments;
1105                         (deUint32)colorImages.size(),                                           // deUint32                                             colorCount;
1106                         &colorAttachmentReferences[0],                                          // const VkAttachmentReference* colorAttachments;
1107                         DE_NULL,                                                                                        // const VkAttachmentReference* resolveAttachments;
1108                         DE_NULL,                                                                                        // VkAttachmentReference                depthStencilAttachment;
1109                         0u,                                                                                                     // deUint32                                             preserveCount;
1110                         DE_NULL                                                                                         // const VkAttachmentReference* pPreserveAttachments;
1111                 };
1112
1113                 const VkRenderPassCreateInfo renderPassParams =
1114                 {
1115                         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,                      // VkStructureType                                      sType;
1116                         DE_NULL,                                                                                        // const void*                                          pNext;
1117                         (VkRenderPassCreateFlags)0,                                                     // VkRenderPassCreateFlags                      flags;
1118                         (deUint32)attachments.size(),                                           // deUint32                                                     attachmentCount;
1119                         &attachments[0],                                                                        // const VkAttachmentDescription*       pAttachments;
1120                         1u,                                                                                                     // deUint32                                                     subpassCount;
1121                         &subpassDescription,                                                            // const VkSubpassDescription*          pSubpasses;
1122                         0u,                                                                                                     // deUint32                                                     dependencyCount;
1123                         DE_NULL                                                                                         // const VkSubpassDependency*           pDependencies;
1124                 };
1125
1126                 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1127         }
1128
1129         // Create framebuffer
1130         {
1131                 std::vector<VkImageView> views(colorImageViews.size());
1132                 for (size_t i = 0; i < colorImageViews.size(); i++)
1133                 {
1134                         views[i] = colorImageViews[i].get()->get();
1135                 }
1136
1137                 const VkFramebufferCreateInfo framebufferParams =
1138                 {
1139                         VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,                      // VkStructureType                              sType;
1140                         DE_NULL,                                                                                        // const void*                                  pNext;
1141                         0u,                                                                                                     // VkFramebufferCreateFlags             flags;
1142                         *renderPass,                                                                            // VkRenderPass                                 renderPass;
1143                         (deUint32)views.size(),                                                         // deUint32                                             attachmentCount;
1144                         &views[0],                                                                                      // const VkImageView*                   pAttachments;
1145                         (deUint32)renderSize.x(),                                                       // deUint32                                             width;
1146                         (deUint32)renderSize.y(),                                                       // deUint32                                             height;
1147                         1u                                                                                                      // deUint32                                             layers;
1148                 };
1149
1150                 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1151         }
1152
1153         // Create pipeline layout
1154         {
1155                 const VkDescriptorSetLayout                     setLayouts[]                    =
1156                 {
1157                         *emptyDescriptorSetLayout,
1158                         m_extraResourcesLayout
1159                 };
1160                 const VkPipelineLayoutCreateInfo        pipelineLayoutParams    =
1161                 {
1162                         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,          // VkStructureType                              sType;
1163                         DE_NULL,                                                                                        // const void*                                  pNext;
1164                         (VkPipelineLayoutCreateFlags)0,                                         // VkPipelineLayoutCreateFlags  flags;
1165                         (m_extraResourcesLayout != 0 ? 2u : 0u),                        // deUint32                                             descriptorSetCount;
1166                         setLayouts,                                                                                     // const VkDescriptorSetLayout* pSetLayouts;
1167                         0u,                                                                                                     // deUint32                                             pushConstantRangeCount;
1168                         DE_NULL                                                                                         // const VkPushConstantRange*   pPushConstantRanges;
1169                 };
1170
1171                 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1172         }
1173
1174         // Create shaders
1175         {
1176                 vertexShaderModule              = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1177                 fragmentShaderModule    = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1178
1179                 if (useGeometryShader)
1180                 {
1181                         if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1182                                 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1183                         else
1184                                 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1185                 }
1186         }
1187
1188         // Create pipeline
1189         {
1190                 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1191                 {
1192                         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                              sType;
1193                         DE_NULL,                                                                                                        // const void*                                                                  pNext;
1194                         (VkPipelineVertexInputStateCreateFlags)0,                                       // VkPipelineVertexInputStateCreateFlags                flags;
1195                         (deUint32)m_vertexBindingDescriptions.size(),                           // deUint32                                                                             bindingCount;
1196                         &m_vertexBindingDescriptions[0],                                                        // const VkVertexInputBindingDescription*               pVertexBindingDescriptions;
1197                         (deUint32)m_vertexAttributeDescriptions.size(),                         // deUint32                                                                             attributeCount;
1198                         &m_vertexAttributeDescriptions[0],                                                      // const VkVertexInputAttributeDescription*             pvertexAttributeDescriptions;
1199                 };
1200
1201                 const std::vector<VkViewport>   viewports       (1, makeViewport(renderSize));
1202                 const std::vector<VkRect2D>             scissors        (1, makeRect2D(renderSize));
1203
1204                 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1205                 {
1206                         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,               // VkStructureType                                                              sType;
1207                         DE_NULL,                                                                                                                // const void*                                                                  pNext;
1208                         (VkPipelineColorBlendStateCreateFlags)0,                                                // VkPipelineColorBlendStateCreateFlags                 flags;
1209                         VK_FALSE,                                                                                                               // VkBool32                                                                             logicOpEnable;
1210                         VK_LOGIC_OP_COPY,                                                                                               // VkLogicOp                                                                    logicOp;
1211                         (deUint32)colorBlendAttachmentStates.size(),                                    // deUint32                                                                             attachmentCount;
1212                         &colorBlendAttachmentStates[0],                                                                 // const VkPipelineColorBlendAttachmentState*   pAttachments;
1213                         { 0.0f, 0.0f, 0.0f, 0.0f }                                                                              // float                                                                                blendConst[4];
1214                 };
1215
1216                 graphicsPipeline = makeGraphicsPipeline(vk,                                                                                                             // const DeviceInterface&                        vk
1217                                                                                                 vkDevice,                                                                                               // const VkDevice                                device
1218                                                                                                 *pipelineLayout,                                                                                // const VkPipelineLayout                        pipelineLayout
1219                                                                                                 *vertexShaderModule,                                                                    // const VkShaderModule                          vertexShaderModule
1220                                                                                                 DE_NULL,                                                                                                // const VkShaderModule                          tessellationControlShaderModule
1221                                                                                                 DE_NULL,                                                                                                // const VkShaderModule                          tessellationEvalShaderModule
1222                                                                                                 useGeometryShader ? *geometryShaderModule : DE_NULL,    // const VkShaderModule                          geometryShaderModule
1223                                                                                                 *fragmentShaderModule,                                                                  // const VkShaderModule                          fragmentShaderModule
1224                                                                                                 *renderPass,                                                                                    // const VkRenderPass                            renderPass
1225                                                                                                 viewports,                                                                                              // const std::vector<VkViewport>&                viewports
1226                                                                                                 scissors,                                                                                               // const std::vector<VkRect2D>&                  scissors
1227                                                                                                 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,                                               // const VkPrimitiveTopology                     topology
1228                                                                                                 0u,                                                                                                             // const deUint32                                subpass
1229                                                                                                 0u,                                                                                                             // const deUint32                                patchControlPoints
1230                                                                                                 &vertexInputStateParams,                                                                // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1231                                                                                                 DE_NULL,                                                                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1232                                                                                                 DE_NULL,                                                                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1233                                                                                                 DE_NULL,                                                                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1234                                                                                                 &colorBlendStateParams);                                                                // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1235         }
1236
1237         // Create command pool
1238         cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1239
1240         // Create command buffer
1241         {
1242                 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1243
1244                 beginCommandBuffer(vk, *cmdBuffer);
1245
1246                 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1247                                                           0, (const VkMemoryBarrier*)DE_NULL,
1248                                                           0, (const VkBufferMemoryBarrier*)DE_NULL,
1249                                                           (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1250                 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1251
1252                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1253
1254                 if (m_extraResourcesLayout != 0)
1255                 {
1256                         DE_ASSERT(extraResources != 0);
1257                         const VkDescriptorSet   descriptorSets[]        = { *emptyDescriptorSet, extraResources };
1258                         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1259                 }
1260                 else
1261                         DE_ASSERT(extraResources == 0);
1262
1263                 const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1264
1265                 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1266
1267                 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1268                 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1269                 {
1270                         buffers[i] = m_vertexBuffers[i].get()->get();
1271                 }
1272
1273                 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1274                 vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1275
1276                 endRenderPass(vk, *cmdBuffer);
1277                 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1278                                                           0, (const VkMemoryBarrier*)DE_NULL,
1279                                                           0, (const VkBufferMemoryBarrier*)DE_NULL,
1280                                                           (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1281
1282                 endCommandBuffer(vk, *cmdBuffer);
1283         }
1284
1285         // Execute Draw
1286         submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1287
1288         // Read back result and output
1289         {
1290                 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1291                 const VkBufferCreateInfo readImageBufferParams =
1292                 {
1293                         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,           // VkStructureType              sType;
1294                         DE_NULL,                                                                        // const void*                  pNext;
1295                         0u,                                                                                     // VkBufferCreateFlags  flags;
1296                         imageSizeBytes,                                                         // VkDeviceSize                 size;
1297                         VK_BUFFER_USAGE_TRANSFER_DST_BIT,                       // VkBufferUsageFlags   usage;
1298                         VK_SHARING_MODE_EXCLUSIVE,                                      // VkSharingMode                sharingMode;
1299                         1u,                                                                                     // deUint32                             queueFamilyCount;
1300                         &queueFamilyIndex,                                                      // const deUint32*              pQueueFamilyIndices;
1301                 };
1302
1303                 // constants for image copy
1304                 Move<VkCommandPool>     copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1305
1306                 const VkBufferImageCopy copyParams =
1307                 {
1308                         0u,                                                                                     // VkDeviceSize                 bufferOffset;
1309                         (deUint32)renderSize.x(),                                       // deUint32                             bufferRowLength;
1310                         (deUint32)renderSize.y(),                                       // deUint32                             bufferImageHeight;
1311                         {
1312                                 VK_IMAGE_ASPECT_COLOR_BIT,                              // VkImageAspect                aspect;
1313                                 0u,                                                                             // deUint32                             mipLevel;
1314                                 0u,                                                                             // deUint32                             arraySlice;
1315                                 1u,                                                                             // deUint32                             arraySize;
1316                         },                                                                                      // VkImageSubresource   imageSubresource;
1317                         { 0u, 0u, 0u },                                                         // VkOffset3D                   imageOffset;
1318                         { renderSize.x(), renderSize.y(), 1u }          // VkExtent3D                   imageExtent;
1319                 };
1320
1321                 // Read back pixels.
1322                 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1323                 {
1324                         const Symbol&                           output                  = m_shaderSpec.outputs[outNdx];
1325                         const int                                       outSize                 = output.varType.getScalarSize();
1326                         const int                                       outVecSize              = glu::getDataTypeNumComponents(output.varType.getBasicType());
1327                         const int                                       outNumLocs              = glu::getDataTypeNumLocations(output.varType.getBasicType());
1328                         const int                                       outLocation             = de::lookup(m_outputLayout.locationMap, output.name);
1329
1330                         for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1331                         {
1332                                 tcu::TextureLevel                       tmpBuf;
1333                                 const tcu::TextureFormat        format = getRenderbufferFormatForOutput(output.varType, false);
1334                                 const tcu::TextureFormat        readFormat (tcu::TextureFormat::RGBA, format.type);
1335                                 const Unique<VkBuffer>          readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1336                                 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1337
1338                                 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1339
1340                                 // Copy image to buffer
1341                                 {
1342
1343                                         Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1344
1345                                         beginCommandBuffer(vk, *copyCmdBuffer);
1346                                         vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1347
1348                                         // Insert a barrier so data written by the transfer is available to the host
1349                                         {
1350                                                 const VkBufferMemoryBarrier barrier =
1351                                                 {
1352                                                         VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,        // VkStructureType    sType;
1353                                                         DE_NULL,                                                                        // const void*        pNext;
1354                                                         VK_ACCESS_TRANSFER_WRITE_BIT,                           // VkAccessFlags      srcAccessMask;
1355                                                         VK_ACCESS_HOST_READ_BIT,                                        // VkAccessFlags      dstAccessMask;
1356                                                         VK_QUEUE_FAMILY_IGNORED,                                        // uint32_t           srcQueueFamilyIndex;
1357                                                         VK_QUEUE_FAMILY_IGNORED,                                        // uint32_t           dstQueueFamilyIndex;
1358                                                         *readImageBuffer,                                                       // VkBuffer           buffer;
1359                                                         0,                                                                                      // VkDeviceSize       offset;
1360                                                         VK_WHOLE_SIZE,                                                          // VkDeviceSize       size;
1361                                                 };
1362
1363                                                 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1364                                                                                         0, (const VkMemoryBarrier*)DE_NULL,
1365                                                                                         1, &barrier,
1366                                                                                         0, (const VkImageMemoryBarrier*)DE_NULL);
1367                                         }
1368
1369                                         endCommandBuffer(vk, *copyCmdBuffer);
1370
1371                                         submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1372                                 }
1373
1374                                 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1375
1376                                 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1377
1378                                 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1379                                 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1380
1381                                 tcu::copy(tmpBuf.getAccess(), resultAccess);
1382
1383                                 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1384                                 {
1385                                         deUint16*       dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1386                                         if (outSize == 4 && outNumLocs == 1)
1387                                                 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1388                                         else
1389                                         {
1390                                                 for (int valNdx = 0; valNdx < numValues; valNdx++)
1391                                                 {
1392                                                         const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1393                                                         deUint16*               dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1394                                                         deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1395                                                 }
1396                                         }
1397                                 }
1398                                 else
1399                                 {
1400                                         deUint32*       dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1401                                         if (outSize == 4 && outNumLocs == 1)
1402                                                 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1403                                         else
1404                                         {
1405                                                 for (int valNdx = 0; valNdx < numValues; valNdx++)
1406                                                 {
1407                                                         const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1408                                                         deUint32*               dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1409                                                         deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1410                                                 }
1411                                         }
1412                                 }
1413                         }
1414                 }
1415         }
1416 }
1417
1418 // VertexShaderExecutor
1419
1420 class VertexShaderExecutor : public FragmentOutExecutor
1421 {
1422 public:
1423                                                                 VertexShaderExecutor    (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1424         virtual                                         ~VertexShaderExecutor   (void);
1425
1426         static void                                     generateSources                 (const ShaderSpec& shaderSpec, SourceCollections& dst);
1427 };
1428
1429 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1430         : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1431 {
1432 }
1433
1434 VertexShaderExecutor::~VertexShaderExecutor (void)
1435 {
1436 }
1437
1438 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1439 {
1440         const FragmentOutputLayout      outputLayout    (computeFragmentOutputLayout(shaderSpec.outputs));
1441
1442         programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1443         /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1444         programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1445 }
1446
1447 // GeometryShaderExecutor
1448
1449 class GeometryShaderExecutor : public FragmentOutExecutor
1450 {
1451 public:
1452                                                                 GeometryShaderExecutor  (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1453         virtual                                         ~GeometryShaderExecutor (void);
1454
1455         static void                                     generateSources                 (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1456
1457 };
1458
1459 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1460         : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1461 {
1462         const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1463
1464         if (!features.geometryShader)
1465                 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1466 }
1467
1468 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1469 {
1470 }
1471
1472 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1473 {
1474         const FragmentOutputLayout      outputLayout    (computeFragmentOutputLayout(shaderSpec.outputs));
1475
1476         programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1477
1478         programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1479         programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1480
1481         /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1482         programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1483
1484 }
1485
1486 // FragmentShaderExecutor
1487
1488 class FragmentShaderExecutor : public FragmentOutExecutor
1489 {
1490 public:
1491                                                                 FragmentShaderExecutor  (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1492         virtual                                         ~FragmentShaderExecutor (void);
1493
1494         static void                                     generateSources                 (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1495
1496 };
1497
1498 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1499         : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1500 {
1501 }
1502
1503 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1504 {
1505 }
1506
1507 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1508 {
1509         const FragmentOutputLayout      outputLayout    (computeFragmentOutputLayout(shaderSpec.outputs));
1510
1511         programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1512         /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1513         programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1514 }
1515
1516 // Shared utilities for compute and tess executors
1517
1518 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1519 {
1520         switch (type)
1521         {
1522                 case glu::TYPE_FLOAT16:                 return 2u;
1523                 case glu::TYPE_FLOAT16_VEC2:    return 4u;
1524                 case glu::TYPE_FLOAT16_VEC3:    return 8u;
1525                 case glu::TYPE_FLOAT16_VEC4:    return 8u;
1526                 default: break;
1527         }
1528
1529         switch (glu::getDataTypeScalarSize(type))
1530         {
1531                 case 1:         return 4u;
1532                 case 2:         return 8u;
1533                 case 3:         return 16u;
1534                 case 4:         return 16u;
1535                 default:
1536                         DE_ASSERT(false);
1537                         return 0u;
1538         }
1539 }
1540
1541 class BufferIoExecutor : public ShaderExecutor
1542 {
1543 public:
1544                                                         BufferIoExecutor        (Context& context, const ShaderSpec& shaderSpec);
1545         virtual                                 ~BufferIoExecutor       (void);
1546
1547 protected:
1548         enum
1549         {
1550                 INPUT_BUFFER_BINDING    = 0,
1551                 OUTPUT_BUFFER_BINDING   = 1,
1552         };
1553
1554         void                                    initBuffers                     (int numValues);
1555         VkBuffer                                getInputBuffer          (void) const            { return *m_inputBuffer;                                        }
1556         VkBuffer                                getOutputBuffer         (void) const            { return *m_outputBuffer;                                       }
1557         deUint32                                getInputStride          (void) const            { return getLayoutStride(m_inputLayout);        }
1558         deUint32                                getOutputStride         (void) const            { return getLayoutStride(m_outputLayout);       }
1559
1560         void                                    uploadInputBuffer       (const void* const* inputPtrs, int numValues);
1561         void                                    readOutputBuffer        (void* const* outputPtrs, int numValues);
1562
1563         static void                             declareBufferBlocks     (std::ostream& src, const ShaderSpec& spec);
1564         static void                             generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1565
1566 protected:
1567         Move<VkBuffer>                  m_inputBuffer;
1568         Move<VkBuffer>                  m_outputBuffer;
1569
1570 private:
1571         struct VarLayout
1572         {
1573                 deUint32                offset;
1574                 deUint32                stride;
1575                 deUint32                matrixStride;
1576
1577                 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1578         };
1579
1580         static void                             computeVarLayout        (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1581         static deUint32                 getLayoutStride         (const vector<VarLayout>& layout);
1582
1583         static void                             copyToBuffer            (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1584         static void                             copyFromBuffer          (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1585
1586         de::MovePtr<Allocation> m_inputAlloc;
1587         de::MovePtr<Allocation> m_outputAlloc;
1588
1589         vector<VarLayout>               m_inputLayout;
1590         vector<VarLayout>               m_outputLayout;
1591 };
1592
1593 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1594         : ShaderExecutor(context, shaderSpec)
1595 {
1596         computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1597         computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1598 }
1599
1600 BufferIoExecutor::~BufferIoExecutor (void)
1601 {
1602 }
1603
1604 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1605 {
1606         return layout.empty() ? 0 : layout[0].stride;
1607 }
1608
1609 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1610 {
1611         deUint32        maxAlignment    = 0;
1612         deUint32        curOffset               = 0;
1613
1614         DE_ASSERT(layout != DE_NULL);
1615         DE_ASSERT(layout->empty());
1616         layout->resize(symbols.size());
1617
1618         for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1619         {
1620                 const Symbol&           symbol          = symbols[varNdx];
1621                 const glu::DataType     basicType       = symbol.varType.getBasicType();
1622                 VarLayout&                      layoutEntry     = (*layout)[varNdx];
1623
1624                 if (glu::isDataTypeScalarOrVector(basicType))
1625                 {
1626                         const deUint32  alignment       = getVecStd430ByteAlignment(basicType);
1627                         const deUint32  size            = (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1628
1629                         curOffset               = (deUint32)deAlign32((int)curOffset, (int)alignment);
1630                         maxAlignment    = de::max(maxAlignment, alignment);
1631
1632                         layoutEntry.offset                      = curOffset;
1633                         layoutEntry.matrixStride        = 0;
1634
1635                         curOffset += size;
1636                 }
1637                 else if (glu::isDataTypeMatrix(basicType))
1638                 {
1639                         const int                               numVecs                 = glu::getDataTypeMatrixNumColumns(basicType);
1640                         const glu::DataType             vecType                 = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
1641                         const deUint32                  vecAlignment    = isDataTypeFloat16OrVec(basicType) ? getVecStd430ByteAlignment(vecType)/2 : getVecStd430ByteAlignment(vecType);
1642
1643                         curOffset               = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1644                         maxAlignment    = de::max(maxAlignment, vecAlignment);
1645
1646                         layoutEntry.offset                      = curOffset;
1647                         layoutEntry.matrixStride        = vecAlignment;
1648
1649                         curOffset += vecAlignment*numVecs;
1650                 }
1651                 else
1652                         DE_ASSERT(false);
1653         }
1654
1655         {
1656                 const deUint32  totalSize       = (deUint32)deAlign32(curOffset, maxAlignment);
1657
1658                 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1659                         varIter->stride = totalSize;
1660         }
1661 }
1662
1663 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1664 {
1665         // Input struct
1666         if (!spec.inputs.empty())
1667         {
1668                 glu::StructType inputStruct("Inputs");
1669                 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1670                         inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1671                 src << glu::declare(&inputStruct) << ";\n";
1672         }
1673
1674         // Output struct
1675         {
1676                 glu::StructType outputStruct("Outputs");
1677                 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1678                         outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1679                 src << glu::declare(&outputStruct) << ";\n";
1680         }
1681
1682         src << "\n";
1683
1684         if (!spec.inputs.empty())
1685         {
1686                 src     << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1687                         << "{\n"
1688                         << "    Inputs inputs[];\n"
1689                         << "};\n";
1690         }
1691
1692         src     << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1693                 << "{\n"
1694                 << "    Outputs outputs[];\n"
1695                 << "};\n"
1696                 << "\n";
1697 }
1698
1699 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1700 {
1701         std::string     tname;
1702         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1703         {
1704                 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1705                 if (f16BitTest)
1706                 {
1707                         tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1708                 }
1709                 else
1710                 {
1711                         tname = glu::getDataTypeName(symIter->varType.getBasicType());
1712                 }
1713                 src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1714         }
1715
1716         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1717         {
1718                 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1719                 if (f16BitTest)
1720                 {
1721                         tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1722                 }
1723                 else
1724                 {
1725                         tname = glu::getDataTypeName(symIter->varType.getBasicType());
1726                 }
1727                 src << "\t" << tname << " " << symIter->name << ";\n";
1728                 if (f16BitTest)
1729                 {
1730                         const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1731                         src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1732                 }
1733         }
1734
1735         src << "\n";
1736
1737         {
1738                 std::istringstream      opSrc   (spec.source);
1739                 std::string                     line;
1740
1741                 while (std::getline(opSrc, line))
1742                         src << "\t" << line << "\n";
1743         }
1744
1745         if (spec.packFloat16Bit)
1746                 packFloat16Bit (src, spec.outputs);
1747
1748         src << "\n";
1749         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1750         {
1751                 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1752                 if(f16BitTest)
1753                         src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1754                 else
1755                         src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1756         }
1757 }
1758
1759 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1760 {
1761         if (varType.isBasicType())
1762         {
1763                 const glu::DataType             basicType               = varType.getBasicType();
1764                 const bool                              isMatrix                = glu::isDataTypeMatrix(basicType);
1765                 const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
1766                 const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1767                 const int                               numComps                = scalarSize / numVecs;
1768
1769                 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1770                 {
1771                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1772                         {
1773                                 const int               size                    = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1774                                 const int               srcOffset               = size * (elemNdx * scalarSize + vecNdx * numComps);
1775                                 const int               dstOffset               = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1776                                 const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
1777                                 deUint8*                dstPtr                  = (deUint8*)dstBasePtr + dstOffset;
1778
1779                                 deMemcpy(dstPtr, srcPtr, size * numComps);
1780                         }
1781                 }
1782         }
1783         else
1784                 throw tcu::InternalError("Unsupported type");
1785 }
1786
1787 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1788 {
1789         if (varType.isBasicType())
1790         {
1791                 const glu::DataType             basicType               = varType.getBasicType();
1792                 const bool                              isMatrix                = glu::isDataTypeMatrix(basicType);
1793                 const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
1794                 const int                               numVecs                 = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1795                 const int                               numComps                = scalarSize / numVecs;
1796
1797                 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1798                 {
1799                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1800                         {
1801                                 const int               size                    = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1802                                 const int               srcOffset               = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1803                                 const int               dstOffset               = size * (elemNdx * scalarSize + vecNdx * numComps);
1804                                 const deUint8*  srcPtr                  = (const deUint8*)srcBasePtr + srcOffset;
1805                                 deUint8*                dstPtr                  = (deUint8*)dstBasePtr + dstOffset;
1806
1807                                 deMemcpy(dstPtr, srcPtr, size * numComps);
1808                         }
1809                 }
1810         }
1811         else
1812                 throw tcu::InternalError("Unsupported type");
1813 }
1814
1815 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1816 {
1817         const VkDevice                  vkDevice                        = m_context.getDevice();
1818         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1819
1820         const deUint32                  inputStride                     = getLayoutStride(m_inputLayout);
1821         const int                               inputBufferSize         = inputStride * numValues;
1822
1823         if (inputBufferSize == 0)
1824                 return; // No inputs
1825
1826         DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1827         for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1828         {
1829                 const glu::VarType&             varType         = m_shaderSpec.inputs[inputNdx].varType;
1830                 const VarLayout&                layout          = m_inputLayout[inputNdx];
1831
1832                 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr());
1833         }
1834
1835         flushAlloc(vk, vkDevice, *m_inputAlloc);
1836 }
1837
1838 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1839 {
1840         const VkDevice                  vkDevice                        = m_context.getDevice();
1841         const DeviceInterface&  vk                                      = m_context.getDeviceInterface();
1842
1843         DE_ASSERT(numValues > 0); // At least some outputs are required.
1844
1845         invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1846
1847         DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1848         for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1849         {
1850                 const glu::VarType&             varType         = m_shaderSpec.outputs[outputNdx].varType;
1851                 const VarLayout&                layout          = m_outputLayout[outputNdx];
1852
1853                 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1854         }
1855 }
1856
1857 void BufferIoExecutor::initBuffers (int numValues)
1858 {
1859         const deUint32                          inputStride                     = getLayoutStride(m_inputLayout);
1860         const deUint32                          outputStride            = getLayoutStride(m_outputLayout);
1861         // Avoid creating zero-sized buffer/memory
1862         const size_t                            inputBufferSize         = de::max(numValues * inputStride, 1u);
1863         const size_t                            outputBufferSize        = numValues * outputStride;
1864
1865         // Upload data to buffer
1866         const VkDevice                          vkDevice                        = m_context.getDevice();
1867         const DeviceInterface&          vk                                      = m_context.getDeviceInterface();
1868         const deUint32                          queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1869         Allocator&                                      memAlloc                        = m_context.getDefaultAllocator();
1870
1871         const VkBufferCreateInfo inputBufferParams =
1872         {
1873                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,           // VkStructureType              sType;
1874                 DE_NULL,                                                                        // const void*                  pNext;
1875                 0u,                                                                                     // VkBufferCreateFlags  flags;
1876                 inputBufferSize,                                                        // VkDeviceSize                 size;
1877                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,                     // VkBufferUsageFlags   usage;
1878                 VK_SHARING_MODE_EXCLUSIVE,                                      // VkSharingMode                sharingMode;
1879                 1u,                                                                                     // deUint32                             queueFamilyCount;
1880                 &queueFamilyIndex                                                       // const deUint32*              pQueueFamilyIndices;
1881         };
1882
1883         m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1884         m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1885
1886         VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1887
1888         const VkBufferCreateInfo outputBufferParams =
1889         {
1890                 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,           // VkStructureType              sType;
1891                 DE_NULL,                                                                        // const void*                  pNext;
1892                 0u,                                                                                     // VkBufferCreateFlags  flags;
1893                 outputBufferSize,                                                       // VkDeviceSize                 size;
1894                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,                     // VkBufferUsageFlags   usage;
1895                 VK_SHARING_MODE_EXCLUSIVE,                                      // VkSharingMode                sharingMode;
1896                 1u,                                                                                     // deUint32                             queueFamilyCount;
1897                 &queueFamilyIndex                                                       // const deUint32*              pQueueFamilyIndices;
1898         };
1899
1900         m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1901         m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1902
1903         VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1904 }
1905
1906 // ComputeShaderExecutor
1907
1908 class ComputeShaderExecutor : public BufferIoExecutor
1909 {
1910 public:
1911                                                 ComputeShaderExecutor   (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1912         virtual                         ~ComputeShaderExecutor  (void);
1913
1914         static void                     generateSources                 (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1915
1916         virtual void            execute                                 (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1917
1918 protected:
1919         static std::string      generateComputeShader   (const ShaderSpec& spec);
1920
1921 private:
1922         const VkDescriptorSetLayout                                     m_extraResourcesLayout;
1923 };
1924
1925 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1926         : BufferIoExecutor                      (context, shaderSpec)
1927         , m_extraResourcesLayout        (extraResourcesLayout)
1928 {
1929 }
1930
1931 ComputeShaderExecutor::~ComputeShaderExecutor   (void)
1932 {
1933 }
1934
1935 std::string getTypeSpirv(const glu::DataType type)
1936 {
1937         switch(type)
1938         {
1939         case glu::TYPE_FLOAT16:
1940                 return "%f16";
1941         case glu::TYPE_FLOAT16_VEC2:
1942                 return "%v2f16";
1943         case glu::TYPE_FLOAT16_VEC3:
1944                 return "%v3f16";
1945         case glu::TYPE_FLOAT16_VEC4:
1946                 return "%v4f16";
1947         case glu::TYPE_FLOAT:
1948                 return "%f32";
1949         case glu::TYPE_FLOAT_VEC2:
1950                 return "%v2f32";
1951         case glu::TYPE_FLOAT_VEC3:
1952                 return "%v3f32";
1953         case glu::TYPE_FLOAT_VEC4:
1954                 return "%v4f32";
1955         case glu::TYPE_INT:
1956                 return "%i32";
1957         case glu::TYPE_INT_VEC2:
1958                 return "%v2i32";
1959         case glu::TYPE_INT_VEC3:
1960                 return "%v3i32";
1961         case glu::TYPE_INT_VEC4:
1962                 return "%v4i32";
1963         default:
1964                 DE_ASSERT(0);
1965                 return "";
1966                 break;
1967         }
1968 }
1969
1970 std::string moveBitOperation (std::string variableName, const int operationNdx)
1971 {
1972         std::ostringstream      src;
1973         src << "\n"
1974         << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
1975         << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
1976         << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
1977         return src.str();
1978 }
1979
1980 std::string sclarComparison(const std::string opeartion, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
1981 {
1982         std::ostringstream      src;
1983         std::string                     boolType;
1984
1985         switch (type)
1986         {
1987         case glu::TYPE_FLOAT16:
1988         case glu::TYPE_FLOAT:
1989                 src << "\n"
1990                         << "%operation_result_" << operationNdx << " = " << opeartion << " %bool %in0_val %in1_val\n"
1991                         << "OpSelectionMerge %IF_" << operationNdx << " None\n"
1992                         << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
1993                         << "%label_IF_" << operationNdx << " = OpLabel\n"
1994                         << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
1995                         << "%out_val_" << operationNdx << " = OpLoad %i32 %out\n"
1996                         << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
1997                         << "OpStore %out %add_if_" << operationNdx << "\n"
1998                         << "OpBranch %IF_" << operationNdx << "\n"
1999                         << "%IF_" << operationNdx << " = OpLabel\n";
2000                 return src.str();
2001         case glu::TYPE_FLOAT16_VEC2:
2002         case glu::TYPE_FLOAT_VEC2:
2003                 boolType = "%v2bool";
2004                 break;
2005         case glu::TYPE_FLOAT16_VEC3:
2006         case glu::TYPE_FLOAT_VEC3:
2007                 boolType = "%v3bool";
2008                 break;
2009         case glu::TYPE_FLOAT16_VEC4:
2010         case glu::TYPE_FLOAT_VEC4:
2011                 boolType = "%v4bool";
2012                 break;
2013         default:
2014                 DE_ASSERT(0);
2015                 return "";
2016                 break;
2017         }
2018
2019         src << "\n"
2020                 << "%operation_result_" << operationNdx << " = " << opeartion << " " << boolType << " %in0_val %in1_val\n"
2021                 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2022                 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2023
2024         src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2025         for(int ndx = 0; ndx < scalarSize; ++ndx)
2026                 src << " %operation_val_" << operationNdx;
2027         src << "\n";
2028
2029         src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2030                 << "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out\n"
2031
2032                 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2033                 << "OpStore %out %add_if_" << operationNdx << "\n";
2034
2035         return src.str();
2036 }
2037
2038 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool isMediump)
2039 {
2040         const int                       operationAmount = 10;
2041         int                                     moveBitNdx              = 0;
2042         const std::string       inputType1              = getTypeSpirv(spec.inputs[0].varType.getBasicType());
2043         const std::string       inputType2              = getTypeSpirv(spec.inputs[1].varType.getBasicType());
2044         const std::string       outputType              = getTypeSpirv(spec.outputs[0].varType.getBasicType());
2045         const std::string       packType                = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2046
2047         std::string     opeartions[operationAmount]     =
2048         {
2049                 "OpFOrdEqual",
2050                 "OpFOrdGreaterThan",
2051                 "OpFOrdLessThan",
2052                 "OpFOrdGreaterThanEqual",
2053                 "OpFOrdLessThanEqual",
2054                 "OpFUnordEqual",
2055                 "OpFUnordGreaterThan",
2056                 "OpFUnordLessThan",
2057                 "OpFUnordGreaterThanEqual",
2058                 "OpFUnordLessThanEqual"
2059         };
2060
2061         std::ostringstream      src;
2062         src << "; SPIR-V\n"
2063                 "; Version: 1.0\n"
2064                 "; Generator: Khronos Glslang Reference Front End; 4\n"
2065                 "; Bound: 114\n"
2066                 "; Schema: 0\n"
2067                 "OpCapability Shader\n";
2068
2069         if (spec.packFloat16Bit || are16Bit)
2070                 src << "OpCapability Float16\n";
2071
2072         if (are16Bit)
2073                 src << "OpCapability StorageBuffer16BitAccess\n"
2074                         "OpCapability UniformAndStorageBuffer16BitAccess\n";
2075
2076         if (are16Bit)
2077                 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2078
2079         src << "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2080                 "OpMemoryModel Logical GLSL450\n"
2081                 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2082                 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2083                 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2084                 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2085
2086         //input offset
2087         {
2088                 int offset = 0;
2089                 int ndx = 0;
2090                 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2091                 {
2092                         src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2093                         ++ndx;
2094                         offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2095                 }
2096                 src << "OpDecorate %up_SSB0_IN ArrayStride "<< offset << "\n";
2097         }
2098
2099         src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2100                 "OpDecorate %ssboIN BufferBlock\n"
2101                 "OpDecorate %ssbo_src DescriptorSet 0\n"
2102                 "OpDecorate %ssbo_src Binding 0\n"
2103                 "\n";
2104
2105         if (isMediump)
2106         {
2107                 src << "OpMemberDecorate %SSB0_IN 1 RelaxedPrecision\n"
2108                         "OpDecorate %in0 RelaxedPrecision\n"
2109                         "OpMemberDecorate %SSB0_IN 0 RelaxedPrecision\n"
2110                         "OpDecorate %src_val_0_0 RelaxedPrecision\n"
2111                         "OpDecorate %src_val_0_0 RelaxedPrecision\n"
2112                         "OpDecorate %in1 RelaxedPrecision\n"
2113                         "OpDecorate %src_val_0_1 RelaxedPrecision\n"
2114                         "OpDecorate %src_val_0_1 RelaxedPrecision\n"
2115                         "OpDecorate %in0_val RelaxedPrecision\n"
2116                         "OpDecorate %in1_val RelaxedPrecision\n"
2117                         "OpDecorate %in0_val RelaxedPrecision\n"
2118                         "OpDecorate %in1_val RelaxedPrecision\n"
2119                         "OpMemberDecorate %SSB0_OUT 0 RelaxedPrecision\n";
2120         }
2121
2122         //output offset
2123         {
2124                 int offset = 0;
2125                 int ndx = 0;
2126                 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
2127                 {
2128                         src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2129                         ++ndx;
2130                         offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2131                 }
2132                 src << "OpDecorate %up_SSB0_OUT ArrayStride " << offset << "\n";
2133         }
2134
2135         src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2136                 "OpDecorate %ssboOUT BufferBlock\n"
2137                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2138                 "OpDecorate %ssbo_dst Binding 1\n"
2139                 "\n"
2140                 "%void  = OpTypeVoid\n"
2141                 "%bool  = OpTypeBool\n"
2142                 "%v2bool = OpTypeVector %bool 2\n"
2143                 "%v3bool = OpTypeVector %bool 3\n"
2144                 "%v4bool = OpTypeVector %bool 4\n"
2145                 "%u32   = OpTypeInt 32 0\n";
2146
2147         if (!are16Bit) //f32 is not needed when shader operates only on f16
2148                 src << "%f32   = OpTypeFloat 32\n"
2149                         "%v2f32 = OpTypeVector %f32 2\n"
2150                         "%v3f32 = OpTypeVector %f32 3\n"
2151                         "%v4f32 = OpTypeVector %f32 4\n";
2152
2153         if (spec.packFloat16Bit || are16Bit)
2154                 src << "%f16   = OpTypeFloat 16\n"
2155                         "%v2f16 = OpTypeVector %f16 2\n"
2156                         "%v3f16 = OpTypeVector %f16 3\n"
2157                         "%v4f16 = OpTypeVector %f16 4\n";
2158
2159         src << "%i32   = OpTypeInt 32 1\n"
2160                 "%v2i32 = OpTypeVector %i32 2\n"
2161                 "%v3i32 = OpTypeVector %i32 3\n"
2162                 "%v4i32 = OpTypeVector %i32 4\n"
2163                 "%v3u32 = OpTypeVector %u32 3\n"
2164                 "\n"
2165                 "%ip_u32   = OpTypePointer Input %u32\n"
2166                 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2167                 "%up_float   = OpTypePointer Uniform " << inputType1 << "\n"
2168                 "\n"
2169                 "%fun     = OpTypeFunction %void\n"
2170                 "%fp_u32  = OpTypePointer Function %u32\n"
2171                 "%fp_i32  = OpTypePointer Function " << outputType << "\n"
2172                 "%fp_f32  = OpTypePointer Function " << inputType1 << "\n"
2173                 "%fp_operation =  OpTypePointer Function %i32\n";
2174
2175         if (spec.packFloat16Bit)
2176                 src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2177
2178         src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2179                 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2180                 "%up_i32 = OpTypePointer Uniform " << outputType << "\n"
2181                 "\n"
2182                 "%c_u32_0 = OpConstant %u32 0\n"
2183                 "%c_u32_1 = OpConstant %u32 1\n"
2184                 "%c_u32_2 = OpConstant %u32 2\n"
2185                 "%c_i32_0 = OpConstant %i32 0\n"
2186                 "%c_i32_1 = OpConstant %i32 1\n"
2187                 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2188                 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2189                 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2190                 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2191                 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2192                 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2193                 "\n"
2194                 "%SSB0_IN    = OpTypeStruct " << inputType1 << " " << inputType2 << "\n"
2195                 "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2196                 "%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2197                 "%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2198                 "%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2199                 "\n"
2200                 "%SSB0_OUT    = OpTypeStruct " << outputType << "\n"
2201                 "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2202                 "%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2203                 "%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2204                 "%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2205                 "\n"
2206                 "%BP_main = OpFunction %void None %fun\n"
2207                 "%BP_label = OpLabel\n"
2208                 "%invocationNdx = OpVariable  %fp_u32 Function\n";
2209
2210         if (spec.packFloat16Bit)
2211                 src << "%in0 = OpVariable %fp_f16 Function\n"
2212                         "%in1 = OpVariable %fp_f16 Function\n";
2213         else
2214                 src << "%in0 = OpVariable %fp_f32 Function\n"
2215                         "%in1 = OpVariable %fp_f32 Function\n";
2216
2217         src << "%operation = OpVariable %fp_operation Function\n"
2218                 "%out = OpVariable %fp_i32 Function\n"
2219                 "%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2220                 "%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2221                 "%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2222                 "%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2223                 "%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2224                 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2225                 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2226                 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2227                 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2228                 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2229                 "\n"
2230                 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2231                 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2232                 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2233                 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2234                 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2235                 "OpStore %invocationNdx %add_2\n"
2236                 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n"
2237                 "\n"
2238                 "%src_ptr_0_0 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_0\n"
2239                 "%src_val_0_0 = OpLoad " << inputType1 << " %src_ptr_0_0\n";
2240
2241         if(spec.packFloat16Bit)
2242                 src << "%val_f16_0_0 = OpFConvert " << packType <<" %src_val_0_0\n"
2243                         "OpStore %in0 %val_f16_0_0\n";
2244         else
2245                 src << "OpStore %in0 %src_val_0_0\n";
2246
2247         src << "\n"
2248                 "%src_ptr_0_1 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_1\n"
2249                 "%src_val_0_1 = OpLoad " << inputType2 << " %src_ptr_0_1\n";
2250
2251         if (spec.packFloat16Bit)
2252                 src << "%val_f16_0_1 = OpFConvert " << packType << " %src_val_0_1\n"
2253                         "OpStore %in1 %val_f16_0_1\n";
2254         else
2255                 src << "OpStore %in1 %src_val_0_1\n";
2256
2257         src << "\n"
2258                 "OpStore %operation %c_i32_1\n"
2259                 "OpStore %out %c_" << &outputType[1] << "_0\n"
2260                 "\n";
2261
2262         if (spec.packFloat16Bit)
2263                 src << "%in0_val = OpLoad " << packType << " %in0\n"
2264                         "%in1_val = OpLoad " << packType << " %in1\n";
2265         else
2266                 src << "%in0_val = OpLoad " << inputType1 << " %in0\n"
2267                         "%in1_val = OpLoad " << inputType2 << " %in1\n";
2268
2269         src << "\n";
2270         for(int operationNdx = 0; operationNdx < operationAmount; ++operationNdx)
2271         {
2272                 src << sclarComparison  (opeartions[operationNdx], operationNdx,
2273                                                                 spec.inputs[0].varType.getBasicType(),
2274                                                                 outputType,
2275                                                                 spec.outputs[0].varType.getScalarSize());
2276                 src << moveBitOperation("%operation", moveBitNdx);
2277                 ++moveBitNdx;
2278         }
2279
2280         src << "\n"
2281                 "%out_val_final = OpLoad " << outputType << " %out\n"
2282                 "%ssbo_dst_ptr = OpAccessChain %up_i32 %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n"
2283                 "OpStore %ssbo_dst_ptr %out_val_final\n"
2284                 "\n"
2285                 "OpReturn\n"
2286                 "OpFunctionEnd\n";
2287         return src.str();
2288 }
2289
2290
2291 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2292 {
2293         if(spec.spirVShader)
2294         {
2295                 bool    are16Bit        = false;
2296                 bool    isMediump       = false;
2297                 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2298                 {
2299                         if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2300                                 are16Bit = true;
2301
2302                         if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2303                                 isMediump = true;
2304
2305                         if(isMediump && are16Bit)
2306                                 break;
2307                 }
2308
2309                 return generateSpirv(spec, are16Bit, isMediump);
2310         }
2311         else
2312         {
2313                 std::ostringstream src;
2314                 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2315
2316                 if (!spec.globalDeclarations.empty())
2317                         src << spec.globalDeclarations << "\n";
2318
2319                 src << "layout(local_size_x = 1) in;\n"
2320                         << "\n";
2321
2322                 declareBufferBlocks(src, spec);
2323
2324                 src << "void main (void)\n"
2325                         << "{\n"
2326                         << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2327                         << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2328
2329                 generateExecBufferIo(src, spec, "invocationNdx");
2330
2331                 src << "}\n";
2332
2333                 return src.str();
2334         }
2335 }
2336
2337 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2338 {
2339         if(shaderSpec.spirVShader)
2340                 programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2341         else
2342                 programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2343 }
2344
2345 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2346 {
2347         const VkDevice                                  vkDevice                                = m_context.getDevice();
2348         const DeviceInterface&                  vk                                              = m_context.getDeviceInterface();
2349         const VkQueue                                   queue                                   = m_context.getUniversalQueue();
2350         const deUint32                                  queueFamilyIndex                = m_context.getUniversalQueueFamilyIndex();
2351
2352         DescriptorPoolBuilder                   descriptorPoolBuilder;
2353         DescriptorSetLayoutBuilder              descriptorSetLayoutBuilder;
2354
2355         Move<VkShaderModule>                    computeShaderModule;
2356         Move<VkPipeline>                                computePipeline;
2357         Move<VkPipelineLayout>                  pipelineLayout;
2358         Move<VkCommandPool>                             cmdPool;
2359         Move<VkDescriptorPool>                  descriptorPool;
2360         Move<VkDescriptorSetLayout>             descriptorSetLayout;
2361         Move<VkDescriptorSet>                   descriptorSet;
2362         const deUint32                                  numDescriptorSets               = (m_extraResourcesLayout != 0) ? 2u : 1u;
2363
2364         DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2365
2366         initBuffers(numValues);
2367
2368         // Setup input buffer & copy data
2369         uploadInputBuffer(inputs, numValues);
2370
2371         // Create command pool
2372         cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2373
2374         // Create command buffer
2375
2376         descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2377         descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2378         descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2379         descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2380
2381         descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2382         descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2383
2384         const VkDescriptorSetAllocateInfo allocInfo =
2385         {
2386                 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2387                 DE_NULL,
2388                 *descriptorPool,
2389                 1u,
2390                 &*descriptorSetLayout
2391         };
2392
2393         descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2394
2395         // Create pipeline layout
2396         {
2397                 const VkDescriptorSetLayout                     descriptorSetLayouts[]  =
2398                 {
2399                         *descriptorSetLayout,
2400                         m_extraResourcesLayout
2401                 };
2402                 const VkPipelineLayoutCreateInfo        pipelineLayoutParams    =
2403                 {
2404                         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,          // VkStructureType                              sType;
2405                         DE_NULL,                                                                                        // const void*                                  pNext;
2406                         (VkPipelineLayoutCreateFlags)0,                                         // VkPipelineLayoutCreateFlags  flags;
2407                         numDescriptorSets,                                                                      // deUint32                                             CdescriptorSetCount;
2408                         descriptorSetLayouts,                                                           // const VkDescriptorSetLayout* pSetLayouts;
2409                         0u,                                                                                                     // deUint32                                             pushConstantRangeCount;
2410                         DE_NULL                                                                                         // const VkPushConstantRange*   pPushConstantRanges;
2411                 };
2412
2413                 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2414         }
2415
2416         // Create shaders
2417         {
2418                 computeShaderModule             = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2419         }
2420
2421         // create pipeline
2422         {
2423                 const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2424                 {
2425                         {
2426                                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,            // VkStructureType                                              sType;
2427                                 DE_NULL,                                                                                                        // const void*                                                  pNext;
2428                                 (VkPipelineShaderStageCreateFlags)0u,                                           // VkPipelineShaderStageCreateFlags             flags;
2429                                 VK_SHADER_STAGE_COMPUTE_BIT,                                                            // VkShaderStageFlagsBit                                stage;
2430                                 *computeShaderModule,                                                                           // VkShaderModule                                               shader;
2431                                 "main",                                                                                                         // const char*                                                  pName;
2432                                 DE_NULL                                                                                                         // const VkSpecializationInfo*                  pSpecializationInfo;
2433                         }
2434                 };
2435
2436                 const VkComputePipelineCreateInfo computePipelineParams =
2437                 {
2438                         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,         // VkStructureType                                                                      sType;
2439                         DE_NULL,                                                                                        // const void*                                                                          pNext;
2440                         (VkPipelineCreateFlags)0,                                                       // VkPipelineCreateFlags                                                        flags;
2441                         *shaderStageParams,                                                                     // VkPipelineShaderStageCreateInfo                                      cs;
2442                         *pipelineLayout,                                                                        // VkPipelineLayout                                                                     layout;
2443                         0u,                                                                                                     // VkPipeline                                                                           basePipelineHandle;
2444                         0u,                                                                                                     // int32_t                                                                                      basePipelineIndex;
2445                 };
2446
2447                 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2448         }
2449
2450         const int                       maxValuesPerInvocation  = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2451         int                                     curOffset                               = 0;
2452         const deUint32          inputStride                             = getInputStride();
2453         const deUint32          outputStride                    = getOutputStride();
2454
2455         while (curOffset < numValues)
2456         {
2457                 Move<VkCommandBuffer>   cmdBuffer;
2458                 const int                               numToExec       = de::min(maxValuesPerInvocation, numValues-curOffset);
2459
2460                 // Update descriptors
2461                 {
2462                         DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2463
2464                         const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2465                         {
2466                                 *m_outputBuffer,                                // VkBuffer                     buffer;
2467                                 curOffset * outputStride,               // VkDeviceSize         offset;
2468                                 numToExec * outputStride                // VkDeviceSize         range;
2469                         };
2470
2471                         descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2472
2473                         if (inputStride)
2474                         {
2475                                 const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2476                                 {
2477                                         *m_inputBuffer,                                 // VkBuffer                     buffer;
2478                                         curOffset * inputStride,                // VkDeviceSize         offset;
2479                                         numToExec * inputStride                 // VkDeviceSize         range;
2480                                 };
2481
2482                                 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2483                         }
2484
2485                         descriptorSetUpdateBuilder.update(vk, vkDevice);
2486                 }
2487
2488                 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2489                 beginCommandBuffer(vk, *cmdBuffer);
2490                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2491
2492                 {
2493                         const VkDescriptorSet   descriptorSets[]        = { *descriptorSet, extraResources };
2494                         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2495                 }
2496
2497                 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2498
2499                 // Insert a barrier so data written by the shader is available to the host
2500                 {
2501                         const VkBufferMemoryBarrier bufferBarrier =
2502                         {
2503                                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,        // VkStructureType    sType;
2504                                 DE_NULL,                                                                        // const void*        pNext;
2505                                 VK_ACCESS_SHADER_WRITE_BIT,                                     // VkAccessFlags      srcAccessMask;
2506                                 VK_ACCESS_HOST_READ_BIT,                                        // VkAccessFlags      dstAccessMask;
2507                                 VK_QUEUE_FAMILY_IGNORED,                                        // uint32_t           srcQueueFamilyIndex;
2508                                 VK_QUEUE_FAMILY_IGNORED,                                        // uint32_t           dstQueueFamilyIndex;
2509                                 *m_outputBuffer,                                                        // VkBuffer           buffer;
2510                                 0,                                                                                      // VkDeviceSize       offset;
2511                                 VK_WHOLE_SIZE,                                                          // VkDeviceSize       size;
2512                         };
2513
2514                         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2515                                                                 0, (const VkMemoryBarrier*)DE_NULL,
2516                                                                 1, &bufferBarrier,
2517                                                                 0, (const VkImageMemoryBarrier*)DE_NULL);
2518                 }
2519
2520                 endCommandBuffer(vk, *cmdBuffer);
2521
2522                 curOffset += numToExec;
2523
2524                 // Execute
2525                 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2526         }
2527
2528         // Read back data
2529         readOutputBuffer(outputs, numValues);
2530 }
2531
2532 // Tessellation utils
2533
2534 static std::string generateVertexShaderForTess (void)
2535 {
2536         std::ostringstream      src;
2537         src << "#version 450\n"
2538                 << "void main (void)\n{\n"
2539                 << "    gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2540                 << "}\n";
2541
2542         return src.str();
2543 }
2544
2545 class TessellationExecutor : public BufferIoExecutor
2546 {
2547 public:
2548                                         TessellationExecutor            (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2549         virtual                 ~TessellationExecutor           (void);
2550
2551         void                    renderTess                                      (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2552
2553 private:
2554         const VkDescriptorSetLayout                                     m_extraResourcesLayout;
2555 };
2556
2557 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2558         : BufferIoExecutor                      (context, shaderSpec)
2559         , m_extraResourcesLayout        (extraResourcesLayout)
2560 {
2561         const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2562
2563         if (!features.tessellationShader)
2564                 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2565 }
2566
2567 TessellationExecutor::~TessellationExecutor (void)
2568 {
2569 }
2570
2571 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2572 {
2573         const size_t                                            inputBufferSize                         = numValues * getInputStride();
2574         const VkDevice                                          vkDevice                                        = m_context.getDevice();
2575         const DeviceInterface&                          vk                                                      = m_context.getDeviceInterface();
2576         const VkQueue                                           queue                                           = m_context.getUniversalQueue();
2577         const deUint32                                          queueFamilyIndex                        = m_context.getUniversalQueueFamilyIndex();
2578         Allocator&                                                      memAlloc                                        = m_context.getDefaultAllocator();
2579
2580         const tcu::UVec2                                        renderSize                                      (DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2581
2582         Move<VkImage>                                           colorImage;
2583         de::MovePtr<Allocation>                         colorImageAlloc;
2584         VkFormat                                                        colorFormat                                     = VK_FORMAT_R8G8B8A8_UNORM;
2585         Move<VkImageView>                                       colorImageView;
2586
2587         Move<VkRenderPass>                                      renderPass;
2588         Move<VkFramebuffer>                                     framebuffer;
2589         Move<VkPipelineLayout>                          pipelineLayout;
2590         Move<VkPipeline>                                        graphicsPipeline;
2591
2592         Move<VkShaderModule>                            vertexShaderModule;
2593         Move<VkShaderModule>                            tessControlShaderModule;
2594         Move<VkShaderModule>                            tessEvalShaderModule;
2595         Move<VkShaderModule>                            fragmentShaderModule;
2596
2597         Move<VkCommandPool>                                     cmdPool;
2598         Move<VkCommandBuffer>                           cmdBuffer;
2599
2600         Move<VkDescriptorPool>                          descriptorPool;
2601         Move<VkDescriptorSetLayout>                     descriptorSetLayout;
2602         Move<VkDescriptorSet>                           descriptorSet;
2603         const deUint32                                          numDescriptorSets                       = (m_extraResourcesLayout != 0) ? 2u : 1u;
2604
2605         DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2606
2607         // Create color image
2608         {
2609                 const VkImageCreateInfo colorImageParams =
2610                 {
2611                         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                                                            // VkStructureType                      sType;
2612                         DE_NULL,                                                                                                                                        // const void*                          pNext;
2613                         0u,                                                                                                                                                     // VkImageCreateFlags           flags;
2614                         VK_IMAGE_TYPE_2D,                                                                                                                       // VkImageType                          imageType;
2615                         colorFormat,                                                                                                                            // VkFormat                                     format;
2616                         { renderSize.x(), renderSize.y(), 1u },                                                                         // VkExtent3D                           extent;
2617                         1u,                                                                                                                                                     // deUint32                                     mipLevels;
2618                         1u,                                                                                                                                                     // deUint32                                     arraySize;
2619                         VK_SAMPLE_COUNT_1_BIT,                                                                                                          // VkSampleCountFlagBits        samples;
2620                         VK_IMAGE_TILING_OPTIMAL,                                                                                                        // VkImageTiling                        tiling;
2621                         VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,          // VkImageUsageFlags            usage;
2622                         VK_SHARING_MODE_EXCLUSIVE,                                                                                                      // VkSharingMode                        sharingMode;
2623                         1u,                                                                                                                                                     // deUint32                                     queueFamilyCount;
2624                         &queueFamilyIndex,                                                                                                                      // const deUint32*                      pQueueFamilyIndices;
2625                         VK_IMAGE_LAYOUT_UNDEFINED                                                                                                       // VkImageLayout                        initialLayout;
2626                 };
2627
2628                 colorImage = createImage(vk, vkDevice, &colorImageParams);
2629
2630                 // Allocate and bind color image memory
2631                 colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2632                 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2633         }
2634
2635         // Create color attachment view
2636         {
2637                 const VkImageViewCreateInfo colorImageViewParams =
2638                 {
2639                         VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,                       // VkStructureType                      sType;
2640                         DE_NULL,                                                                                        // const void*                          pNext;
2641                         0u,                                                                                                     // VkImageViewCreateFlags       flags;
2642                         *colorImage,                                                                            // VkImage                                      image;
2643                         VK_IMAGE_VIEW_TYPE_2D,                                                          // VkImageViewType                      viewType;
2644                         colorFormat,                                                                            // VkFormat                                     format;
2645                         {
2646                                 VK_COMPONENT_SWIZZLE_R,                                                 // VkComponentSwizzle           r;
2647                                 VK_COMPONENT_SWIZZLE_G,                                                 // VkComponentSwizzle           g;
2648                                 VK_COMPONENT_SWIZZLE_B,                                                 // VkComponentSwizzle           b;
2649                                 VK_COMPONENT_SWIZZLE_A                                                  // VkComponentSwizzle           a;
2650                         },                                                                                                      // VkComponentsMapping          components;
2651                         {
2652                                 VK_IMAGE_ASPECT_COLOR_BIT,                                              // VkImageAspectFlags           aspectMask;
2653                                 0u,                                                                                             // deUint32                                     baseMipLevel;
2654                                 1u,                                                                                             // deUint32                                     mipLevels;
2655                                 0u,                                                                                             // deUint32                                     baseArraylayer;
2656                                 1u                                                                                              // deUint32                                     layerCount;
2657                         }                                                                                                       // VkImageSubresourceRange      subresourceRange;
2658                 };
2659
2660                 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2661         }
2662
2663         // Create render pass
2664         {
2665                 const VkAttachmentDescription colorAttachmentDescription =
2666                 {
2667                         0u,                                                                                                     // VkAttachmentDescriptorFlags  flags;
2668                         colorFormat,                                                                            // VkFormat                                             format;
2669                         VK_SAMPLE_COUNT_1_BIT,                                                          // VkSampleCountFlagBits                samples;
2670                         VK_ATTACHMENT_LOAD_OP_CLEAR,                                            // VkAttachmentLoadOp                   loadOp;
2671                         VK_ATTACHMENT_STORE_OP_STORE,                                           // VkAttachmentStoreOp                  storeOp;
2672                         VK_ATTACHMENT_LOAD_OP_DONT_CARE,                                        // VkAttachmentLoadOp                   stencilLoadOp;
2673                         VK_ATTACHMENT_STORE_OP_DONT_CARE,                                       // VkAttachmentStoreOp                  stencilStoreOp;
2674                         VK_IMAGE_LAYOUT_UNDEFINED,                                                      // VkImageLayout                                initialLayout;
2675                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL                        // VkImageLayout                                finalLayout
2676                 };
2677
2678                 const VkAttachmentDescription attachments[1] =
2679                 {
2680                         colorAttachmentDescription
2681                 };
2682
2683                 const VkAttachmentReference colorAttachmentReference =
2684                 {
2685                         0u,                                                                                                     // deUint32                     attachment;
2686                         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL                        // VkImageLayout        layout;
2687                 };
2688
2689                 const VkSubpassDescription subpassDescription =
2690                 {
2691                         0u,                                                                                                     // VkSubpassDescriptionFlags    flags;
2692                         VK_PIPELINE_BIND_POINT_GRAPHICS,                                        // VkPipelineBindPoint                  pipelineBindPoint;
2693                         0u,                                                                                                     // deUint32                                             inputCount;
2694                         DE_NULL,                                                                                        // const VkAttachmentReference* pInputAttachments;
2695                         1u,                                                                                                     // deUint32                                             colorCount;
2696                         &colorAttachmentReference,                                                      // const VkAttachmentReference* pColorAttachments;
2697                         DE_NULL,                                                                                        // const VkAttachmentReference* pResolveAttachments;
2698                         DE_NULL,                                                                                        // VkAttachmentReference                depthStencilAttachment;
2699                         0u,                                                                                                     // deUint32                                             preserveCount;
2700                         DE_NULL                                                                                         // const VkAttachmentReference* pPreserveAttachments;
2701                 };
2702
2703                 const VkRenderPassCreateInfo renderPassParams =
2704                 {
2705                         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,                      // VkStructureType                                      sType;
2706                         DE_NULL,                                                                                        // const void*                                          pNext;
2707                         0u,                                                                                                     // VkRenderPassCreateFlags                      flags;
2708                         1u,                                                                                                     // deUint32                                                     attachmentCount;
2709                         attachments,                                                                            // const VkAttachmentDescription*       pAttachments;
2710                         1u,                                                                                                     // deUint32                                                     subpassCount;
2711                         &subpassDescription,                                                            // const VkSubpassDescription*          pSubpasses;
2712                         0u,                                                                                                     // deUint32                                                     dependencyCount;
2713                         DE_NULL                                                                                         // const VkSubpassDependency*           pDependencies;
2714                 };
2715
2716                 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
2717         }
2718
2719         // Create framebuffer
2720         {
2721                 const VkFramebufferCreateInfo framebufferParams =
2722                 {
2723                         VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,                      // VkStructureType                              sType;
2724                         DE_NULL,                                                                                        // const void*                                  pNext;
2725                         0u,                                                                                                     // VkFramebufferCreateFlags             flags;
2726                         *renderPass,                                                                            // VkRenderPass                                 renderPass;
2727                         1u,                                                                                                     // deUint32                                             attachmentCount;
2728                         &*colorImageView,                                                                       // const VkAttachmentBindInfo*  pAttachments;
2729                         (deUint32)renderSize.x(),                                                       // deUint32                                             width;
2730                         (deUint32)renderSize.y(),                                                       // deUint32                                             height;
2731                         1u                                                                                                      // deUint32                                             layers;
2732                 };
2733
2734                 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
2735         }
2736
2737         // Create descriptors
2738         {
2739                 DescriptorPoolBuilder           descriptorPoolBuilder;
2740                 DescriptorSetLayoutBuilder      descriptorSetLayoutBuilder;
2741
2742                 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2743                 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2744                 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2745                 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2746
2747                 descriptorSetLayout     = descriptorSetLayoutBuilder.build(vk, vkDevice);
2748                 descriptorPool          = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2749
2750                 const VkDescriptorSetAllocateInfo allocInfo =
2751                 {
2752                         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2753                         DE_NULL,
2754                         *descriptorPool,
2755                         1u,
2756                         &*descriptorSetLayout
2757                 };
2758
2759                 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2760                 // Update descriptors
2761                 {
2762                         DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2763                         const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2764                         {
2765                                 *m_outputBuffer,                                // VkBuffer                     buffer;
2766                                 0u,                                                             // VkDeviceSize         offset;
2767                                 VK_WHOLE_SIZE                                   // VkDeviceSize         range;
2768                         };
2769
2770                         descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2771
2772                         VkDescriptorBufferInfo inputDescriptorBufferInfo =
2773                         {
2774                                 0,                                                      // VkBuffer                     buffer;
2775                                 0u,                                                     // VkDeviceSize         offset;
2776                                 VK_WHOLE_SIZE                           // VkDeviceSize         range;
2777                         };
2778
2779                         if (inputBufferSize > 0)
2780                         {
2781                                 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
2782
2783                                 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2784                         }
2785
2786                         descriptorSetUpdateBuilder.update(vk, vkDevice);
2787                 }
2788         }
2789
2790         // Create pipeline layout
2791         {
2792                 const VkDescriptorSetLayout                     descriptorSetLayouts[]          =
2793                 {
2794                         *descriptorSetLayout,
2795                         m_extraResourcesLayout
2796                 };
2797                 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2798                 {
2799                         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,          // VkStructureType                              sType;
2800                         DE_NULL,                                                                                        // const void*                                  pNext;
2801                         (VkPipelineLayoutCreateFlags)0,                                         // VkPipelineLayoutCreateFlags  flags;
2802                         numDescriptorSets,                                                                      // deUint32                                             descriptorSetCount;
2803                         descriptorSetLayouts,                                                           // const VkDescriptorSetLayout* pSetLayouts;
2804                         0u,                                                                                                     // deUint32                                             pushConstantRangeCount;
2805                         DE_NULL                                                                                         // const VkPushConstantRange*   pPushConstantRanges;
2806                 };
2807
2808                 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2809         }
2810
2811         // Create shader modules
2812         {
2813                 vertexShaderModule              = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
2814                 tessControlShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
2815                 tessEvalShaderModule    = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
2816                 fragmentShaderModule    = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
2817         }
2818
2819         // Create pipeline
2820         {
2821                 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
2822                 {
2823                         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,              // VkStructureType                                                      sType;
2824                         DE_NULL,                                                                                                                // const void*                                                          pNext;
2825                         (VkPipelineVertexInputStateCreateFlags)0,                                               // VkPipelineVertexInputStateCreateFlags        flags;
2826                         0u,                                                                                                                             // deUint32                                                                     bindingCount;
2827                         DE_NULL,                                                                                                                // const VkVertexInputBindingDescription*       pVertexBindingDescriptions;
2828                         0u,                                                                                                                             // deUint32                                                                     attributeCount;
2829                         DE_NULL,                                                                                                                // const VkVertexInputAttributeDescription*     pvertexAttributeDescriptions;
2830                 };
2831
2832                 const std::vector<VkViewport>   viewports       (1, makeViewport(renderSize));
2833                 const std::vector<VkRect2D>             scissors        (1, makeRect2D(renderSize));
2834
2835                 graphicsPipeline = makeGraphicsPipeline(vk,                                                                     // const DeviceInterface&                        vk
2836                                                                                                 vkDevice,                                                       // const VkDevice                                device
2837                                                                                                 *pipelineLayout,                                        // const VkPipelineLayout                        pipelineLayout
2838                                                                                                 *vertexShaderModule,                            // const VkShaderModule                          vertexShaderModule
2839                                                                                                 *tessControlShaderModule,                       // const VkShaderModule                          tessellationControlShaderModule
2840                                                                                                 *tessEvalShaderModule,                          // const VkShaderModule                          tessellationEvalShaderModule
2841                                                                                                 DE_NULL,                                                        // const VkShaderModule                          geometryShaderModule
2842                                                                                                 *fragmentShaderModule,                          // const VkShaderModule                          fragmentShaderModule
2843                                                                                                 *renderPass,                                            // const VkRenderPass                            renderPass
2844                                                                                                 viewports,                                                      // const std::vector<VkViewport>&                viewports
2845                                                                                                 scissors,                                                       // const std::vector<VkRect2D>&                  scissors
2846                                                                                                 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,       // const VkPrimitiveTopology                     topology
2847                                                                                                 0u,                                                                     // const deUint32                                subpass
2848                                                                                                 patchControlPoints,                                     // const deUint32                                patchControlPoints
2849                                                                                                 &vertexInputStateParams);                       // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
2850         }
2851
2852         // Create command pool
2853         cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2854
2855         // Create command buffer
2856         {
2857                 const VkClearValue clearValue = getDefaultClearColor();
2858
2859                 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2860
2861                 beginCommandBuffer(vk, *cmdBuffer);
2862
2863                 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
2864
2865                 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
2866
2867                 {
2868                         const VkDescriptorSet   descriptorSets[]        = { *descriptorSet, extraResources };
2869                         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2870                 }
2871
2872                 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
2873
2874                 endRenderPass(vk, *cmdBuffer);
2875
2876                 // Insert a barrier so data written by the shader is available to the host
2877                 {
2878                         const VkBufferMemoryBarrier bufferBarrier =
2879                         {
2880                                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,        // VkStructureType    sType;
2881                                 DE_NULL,                                                                        // const void*        pNext;
2882                                 VK_ACCESS_SHADER_WRITE_BIT,                                     // VkAccessFlags      srcAccessMask;
2883                                 VK_ACCESS_HOST_READ_BIT,                                        // VkAccessFlags      dstAccessMask;
2884                                 VK_QUEUE_FAMILY_IGNORED,                                        // uint32_t           srcQueueFamilyIndex;
2885                                 VK_QUEUE_FAMILY_IGNORED,                                        // uint32_t           dstQueueFamilyIndex;
2886                                 *m_outputBuffer,                                                        // VkBuffer           buffer;
2887                                 0,                                                                                      // VkDeviceSize       offset;
2888                                 VK_WHOLE_SIZE,                                                          // VkDeviceSize       size;
2889                         };
2890
2891                         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2892                                                                   0, (const VkMemoryBarrier*)DE_NULL,
2893                                                                   1, &bufferBarrier,
2894                                                                   0, (const VkImageMemoryBarrier*)DE_NULL);
2895                 }
2896
2897                 endCommandBuffer(vk, *cmdBuffer);
2898         }
2899
2900         // Execute Draw
2901         submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2902 }
2903
2904 // TessControlExecutor
2905
2906 class TessControlExecutor : public TessellationExecutor
2907 {
2908 public:
2909                                                 TessControlExecutor                     (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2910         virtual                         ~TessControlExecutor            (void);
2911
2912         static void                     generateSources                         (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
2913
2914         virtual void            execute                                         (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2915
2916 protected:
2917         static std::string      generateTessControlShader       (const ShaderSpec& shaderSpec);
2918 };
2919
2920 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2921         : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
2922 {
2923 }
2924
2925 TessControlExecutor::~TessControlExecutor (void)
2926 {
2927 }
2928
2929 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
2930 {
2931         std::ostringstream src;
2932         src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
2933
2934         if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
2935                 src << "#extension GL_EXT_tessellation_shader : require\n\n";
2936
2937         if (!shaderSpec.globalDeclarations.empty())
2938                 src << shaderSpec.globalDeclarations << "\n";
2939
2940         src << "\nlayout(vertices = 1) out;\n\n";
2941
2942         declareBufferBlocks(src, shaderSpec);
2943
2944         src << "void main (void)\n{\n";
2945
2946         for (int ndx = 0; ndx < 2; ndx++)
2947                 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
2948
2949         for (int ndx = 0; ndx < 4; ndx++)
2950                 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
2951
2952         src << "\n"
2953                 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
2954
2955         generateExecBufferIo(src, shaderSpec, "invocationId");
2956
2957         src << "}\n";
2958
2959         return src.str();
2960 }
2961
2962 static std::string generateEmptyTessEvalShader ()
2963 {
2964         std::ostringstream src;
2965
2966         src << "#version 450\n"
2967                    "#extension GL_EXT_tessellation_shader : require\n\n";
2968
2969         src << "layout(triangles, ccw) in;\n";
2970
2971         src << "\nvoid main (void)\n{\n"
2972                 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
2973                 << "}\n";
2974
2975         return src.str();
2976 }
2977
2978 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2979 {
2980         programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
2981         programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
2982         programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
2983         programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
2984 }
2985
2986 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2987 {
2988         const deUint32  patchSize       = 3;
2989
2990         initBuffers(numValues);
2991
2992         // Setup input buffer & copy data
2993         uploadInputBuffer(inputs, numValues);
2994
2995         renderTess(numValues, patchSize * numValues, patchSize, extraResources);
2996
2997         // Read back data
2998         readOutputBuffer(outputs, numValues);
2999 }
3000
3001 // TessEvaluationExecutor
3002
3003 class TessEvaluationExecutor : public TessellationExecutor
3004 {
3005 public:
3006                                                 TessEvaluationExecutor  (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3007         virtual                         ~TessEvaluationExecutor (void);
3008
3009         static void                     generateSources                 (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3010
3011         virtual void            execute                                 (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3012
3013 protected:
3014         static std::string      generateTessEvalShader  (const ShaderSpec& shaderSpec);
3015 };
3016
3017 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3018         : TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3019 {
3020 }
3021
3022 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3023 {
3024 }
3025
3026 static std::string generatePassthroughTessControlShader (void)
3027 {
3028         std::ostringstream src;
3029
3030         src << "#version 450\n"
3031                    "#extension GL_EXT_tessellation_shader : require\n\n";
3032
3033         src << "layout(vertices = 1) out;\n\n";
3034
3035         src << "void main (void)\n{\n";
3036
3037         for (int ndx = 0; ndx < 2; ndx++)
3038                 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3039
3040         for (int ndx = 0; ndx < 4; ndx++)
3041                 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3042
3043         src << "}\n";
3044
3045         return src.str();
3046 }
3047
3048 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3049 {
3050         std::ostringstream src;
3051
3052         src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3053
3054         if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3055                 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3056
3057         if (!shaderSpec.globalDeclarations.empty())
3058                 src << shaderSpec.globalDeclarations << "\n";
3059
3060         src << "\n";
3061
3062         src << "layout(isolines, equal_spacing) in;\n\n";
3063
3064         declareBufferBlocks(src, shaderSpec);
3065
3066         src << "void main (void)\n{\n"
3067                 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3068                 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3069
3070         generateExecBufferIo(src, shaderSpec, "invocationId");
3071
3072         src     << "}\n";
3073
3074         return src.str();
3075 }
3076
3077 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3078 {
3079         programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3080         programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3081         programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3082         programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3083 }
3084
3085 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3086 {
3087         const int       patchSize               = 2;
3088         const int       alignedValues   = deAlign32(numValues, patchSize);
3089
3090         // Initialize buffers with aligned value count to make room for padding
3091         initBuffers(alignedValues);
3092
3093         // Setup input buffer & copy data
3094         uploadInputBuffer(inputs, numValues);
3095
3096         renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3097
3098         // Read back data
3099         readOutputBuffer(outputs, numValues);
3100 }
3101
3102 } // anonymous
3103
3104 // ShaderExecutor
3105
3106 ShaderExecutor::~ShaderExecutor (void)
3107 {
3108 }
3109
3110 bool ShaderExecutor::areInputs16Bit (void) const
3111 {
3112         for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3113         {
3114                 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3115                         return true;
3116         }
3117         return false;
3118 }
3119
3120 bool ShaderExecutor::areOutputs16Bit (void) const
3121 {
3122         for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3123         {
3124                 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3125                         return true;
3126         }
3127         return false;
3128 }
3129
3130 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3131 {
3132         if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3133                 return true;
3134         return false;
3135 }
3136
3137 // Utilities
3138
3139 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3140 {
3141         switch (shaderType)
3142         {
3143                 case glu::SHADERTYPE_VERTEX:                                    VertexShaderExecutor::generateSources   (shaderSpec, dst);      break;
3144                 case glu::SHADERTYPE_TESSELLATION_CONTROL:              TessControlExecutor::generateSources    (shaderSpec, dst);      break;
3145                 case glu::SHADERTYPE_TESSELLATION_EVALUATION:   TessEvaluationExecutor::generateSources (shaderSpec, dst);      break;
3146                 case glu::SHADERTYPE_GEOMETRY:                                  GeometryShaderExecutor::generateSources (shaderSpec, dst);      break;
3147                 case glu::SHADERTYPE_FRAGMENT:                                  FragmentShaderExecutor::generateSources (shaderSpec, dst);      break;
3148                 case glu::SHADERTYPE_COMPUTE:                                   ComputeShaderExecutor::generateSources  (shaderSpec, dst);      break;
3149                 default:
3150                         TCU_THROW(InternalError, "Unsupported shader type");
3151         }
3152 }
3153
3154 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3155 {
3156         switch (shaderType)
3157         {
3158                 case glu::SHADERTYPE_VERTEX:                                    return new VertexShaderExecutor         (context, shaderSpec, extraResourcesLayout);
3159                 case glu::SHADERTYPE_TESSELLATION_CONTROL:              return new TessControlExecutor          (context, shaderSpec, extraResourcesLayout);
3160                 case glu::SHADERTYPE_TESSELLATION_EVALUATION:   return new TessEvaluationExecutor       (context, shaderSpec, extraResourcesLayout);
3161                 case glu::SHADERTYPE_GEOMETRY:                                  return new GeometryShaderExecutor       (context, shaderSpec, extraResourcesLayout);
3162                 case glu::SHADERTYPE_FRAGMENT:                                  return new FragmentShaderExecutor       (context, shaderSpec, extraResourcesLayout);
3163                 case glu::SHADERTYPE_COMPUTE:                                   return new ComputeShaderExecutor        (context, shaderSpec, extraResourcesLayout);
3164                 default:
3165                         TCU_THROW(InternalError, "Unsupported shader type");
3166         }
3167 }
3168
3169 } // shaderexecutor
3170 } // vkt