c77b90f565765fe454e02fd42683cadc7714f03b
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / ssbo / vktSSBOLayoutCase.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief SSBO layout case.
24  *//*--------------------------------------------------------------------*/
25
26 #include "vktSSBOLayoutCase.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "gluContextInfo.hpp"
29 #include "gluShaderUtil.hpp"
30 #include "gluVarType.hpp"
31 #include "gluVarTypeUtil.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
35 #include "deMemory.h"
36 #include "deString.h"
37 #include "deMath.h"
38 #include "deSharedPtr.hpp"
39
40 #include "vkBuilderUtil.hpp"
41 #include "vkMemUtil.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkTypeUtil.hpp"
47
48 namespace vkt
49 {
50 namespace ssbo
51 {
52
53 using tcu::TestLog;
54 using std::string;
55 using std::vector;
56 using glu::VarType;
57 using glu::StructType;
58 using glu::StructMember;
59
60 struct LayoutFlagsFmt
61 {
62         deUint32 flags;
63         LayoutFlagsFmt (deUint32 flags_) : flags(flags_) {}
64 };
65
66 std::ostream& operator<< (std::ostream& str, const LayoutFlagsFmt& fmt)
67 {
68         static const struct
69         {
70                 deUint32        bit;
71                 const char*     token;
72         } bitDesc[] =
73         {
74                 { LAYOUT_STD140,                "std140"                },
75                 { LAYOUT_STD430,                "std430"                },
76                 { LAYOUT_ROW_MAJOR,             "row_major"             },
77                 { LAYOUT_COLUMN_MAJOR,  "column_major"  }
78         };
79
80         deUint32 remBits = fmt.flags;
81         for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
82         {
83                 if (remBits & bitDesc[descNdx].bit)
84                 {
85                         if (remBits != fmt.flags)
86                                 str << ", ";
87                         str << bitDesc[descNdx].token;
88                         remBits &= ~bitDesc[descNdx].bit;
89                 }
90         }
91         DE_ASSERT(remBits == 0);
92         return str;
93 }
94
95 // BufferVar implementation.
96
97 BufferVar::BufferVar (const char* name, const VarType& type, deUint32 flags)
98         : m_name        (name)
99         , m_type        (type)
100         , m_flags       (flags)
101         , m_offset      (~0u)
102 {
103 }
104
105 // BufferBlock implementation.
106
107 BufferBlock::BufferBlock (const char* blockName)
108         : m_blockName           (blockName)
109         , m_arraySize           (-1)
110         , m_flags                       (0)
111 {
112         setArraySize(0);
113 }
114
115 void BufferBlock::setArraySize (int arraySize)
116 {
117         DE_ASSERT(arraySize >= 0);
118         m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
119         m_arraySize = arraySize;
120 }
121
122 std::ostream& operator<< (std::ostream& stream, const BlockLayoutEntry& entry)
123 {
124         stream << entry.name << " { name = " << entry.name
125                    << ", size = " << entry.size
126                    << ", activeVarIndices = [";
127
128         for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
129         {
130                 if (i != entry.activeVarIndices.begin())
131                         stream << ", ";
132                 stream << *i;
133         }
134
135         stream << "] }";
136         return stream;
137 }
138
139 static bool isUnsizedArray (const BufferVarLayoutEntry& entry)
140 {
141         DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
142         return entry.arraySize == 0 || entry.topLevelArraySize == 0;
143 }
144
145 std::ostream& operator<< (std::ostream& stream, const BufferVarLayoutEntry& entry)
146 {
147         stream << entry.name << " { type = " << glu::getDataTypeName(entry.type)
148                    << ", blockNdx = " << entry.blockNdx
149                    << ", offset = " << entry.offset
150                    << ", arraySize = " << entry.arraySize
151                    << ", arrayStride = " << entry.arrayStride
152                    << ", matrixStride = " << entry.matrixStride
153                    << ", topLevelArraySize = " << entry.topLevelArraySize
154                    << ", topLevelArrayStride = " << entry.topLevelArrayStride
155                    << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false")
156                    << " }";
157         return stream;
158 }
159
160 // \todo [2012-01-24 pyry] Speed up lookups using hash.
161
162 int BufferLayout::getVariableIndex (const string& name) const
163 {
164         for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
165         {
166                 if (bufferVars[ndx].name == name)
167                         return ndx;
168         }
169         return -1;
170 }
171
172 int BufferLayout::getBlockIndex (const string& name) const
173 {
174         for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
175         {
176                 if (blocks[ndx].name == name)
177                         return ndx;
178         }
179         return -1;
180 }
181
182 // ShaderInterface implementation.
183
184 ShaderInterface::ShaderInterface (void)
185 {
186 }
187
188 ShaderInterface::~ShaderInterface (void)
189 {
190         for (std::vector<StructType*>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
191                 delete *i;
192
193         for (std::vector<BufferBlock*>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
194                 delete *i;
195 }
196
197 StructType& ShaderInterface::allocStruct (const char* name)
198 {
199         m_structs.reserve(m_structs.size()+1);
200         m_structs.push_back(new StructType(name));
201         return *m_structs.back();
202 }
203
204 struct StructNameEquals
205 {
206         std::string name;
207
208         StructNameEquals (const char* name_) : name(name_) {}
209
210         bool operator() (const StructType* type) const
211         {
212                 return type->getTypeName() && name == type->getTypeName();
213         }
214 };
215
216 const StructType* ShaderInterface::findStruct (const char* name) const
217 {
218         std::vector<StructType*>::const_iterator pos = std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
219         return pos != m_structs.end() ? *pos : DE_NULL;
220 }
221
222 void ShaderInterface::getNamedStructs (std::vector<const StructType*>& structs) const
223 {
224         for (std::vector<StructType*>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
225         {
226                 if ((*i)->getTypeName() != DE_NULL)
227                         structs.push_back(*i);
228         }
229 }
230
231 BufferBlock& ShaderInterface::allocBlock (const char* name)
232 {
233         m_bufferBlocks.reserve(m_bufferBlocks.size()+1);
234         m_bufferBlocks.push_back(new BufferBlock(name));
235         return *m_bufferBlocks.back();
236 }
237
238 namespace // Utilities
239 {
240 // Layout computation.
241
242 int getDataTypeByteSize (glu::DataType type)
243 {
244         return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint32);
245 }
246
247 int getDataTypeByteAlignment (glu::DataType type)
248 {
249         switch (type)
250         {
251                 case glu::TYPE_FLOAT:
252                 case glu::TYPE_INT:
253                 case glu::TYPE_UINT:
254                 case glu::TYPE_BOOL:            return 1*(int)sizeof(deUint32);
255
256                 case glu::TYPE_FLOAT_VEC2:
257                 case glu::TYPE_INT_VEC2:
258                 case glu::TYPE_UINT_VEC2:
259                 case glu::TYPE_BOOL_VEC2:       return 2*(int)sizeof(deUint32);
260
261                 case glu::TYPE_FLOAT_VEC3:
262                 case glu::TYPE_INT_VEC3:
263                 case glu::TYPE_UINT_VEC3:
264                 case glu::TYPE_BOOL_VEC3:       // Fall-through to vec4
265
266                 case glu::TYPE_FLOAT_VEC4:
267                 case glu::TYPE_INT_VEC4:
268                 case glu::TYPE_UINT_VEC4:
269                 case glu::TYPE_BOOL_VEC4:       return 4*(int)sizeof(deUint32);
270
271                 default:
272                         DE_ASSERT(false);
273                         return 0;
274         }
275 }
276
277 static inline int deRoundUp32 (int a, int b)
278 {
279         int d = a/b;
280         return d*b == a ? a : (d+1)*b;
281 }
282
283 int computeStd140BaseAlignment (const VarType& type, deUint32 layoutFlags)
284 {
285         const int vec4Alignment = (int)sizeof(deUint32)*4;
286
287         if (type.isBasicType())
288         {
289                 glu::DataType basicType = type.getBasicType();
290
291                 if (glu::isDataTypeMatrix(basicType))
292                 {
293                         const bool      isRowMajor      = !!(layoutFlags & LAYOUT_ROW_MAJOR);
294                         const int       vecSize         = isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
295                                                                                                  : glu::getDataTypeMatrixNumRows(basicType);
296                         const int       vecAlign        = deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
297
298                         return vecAlign;
299                 }
300                 else
301                         return getDataTypeByteAlignment(basicType);
302         }
303         else if (type.isArrayType())
304         {
305                 int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
306
307                 // Round up to alignment of vec4
308                 return deAlign32(elemAlignment, vec4Alignment);
309         }
310         else
311         {
312                 DE_ASSERT(type.isStructType());
313
314                 int maxBaseAlignment = 0;
315
316                 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
317                         maxBaseAlignment = de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
318
319                 return deAlign32(maxBaseAlignment, vec4Alignment);
320         }
321 }
322
323 int computeStd430BaseAlignment (const VarType& type, deUint32 layoutFlags)
324 {
325         // Otherwise identical to std140 except that alignment of structures and arrays
326         // are not rounded up to alignment of vec4.
327
328         if (type.isBasicType())
329         {
330                 glu::DataType basicType = type.getBasicType();
331
332                 if (glu::isDataTypeMatrix(basicType))
333                 {
334                         const bool      isRowMajor      = !!(layoutFlags & LAYOUT_ROW_MAJOR);
335                         const int       vecSize         = isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
336                                                                                                  : glu::getDataTypeMatrixNumRows(basicType);
337                         const int       vecAlign        = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
338                         return vecAlign;
339                 }
340                 else
341                         return getDataTypeByteAlignment(basicType);
342         }
343         else if (type.isArrayType())
344         {
345                 return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
346         }
347         else
348         {
349                 DE_ASSERT(type.isStructType());
350
351                 int maxBaseAlignment = 0;
352
353                 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
354                         maxBaseAlignment = de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
355
356                 return maxBaseAlignment;
357         }
358 }
359
360 int computeRelaxedBlockBaseAlignment (const VarType& type, deUint32 layoutFlags)
361 {
362         if (type.isBasicType())
363         {
364                 glu::DataType basicType = type.getBasicType();
365
366                 if (glu::isDataTypeVector(basicType))
367                         return 4;
368
369                 if (glu::isDataTypeMatrix(basicType))
370                 {
371                         const bool      isRowMajor      = !!(layoutFlags & LAYOUT_ROW_MAJOR);
372                         const int       vecSize         = isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
373                                                                                                  : glu::getDataTypeMatrixNumRows(basicType);
374                         const int       vecAlign        = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
375                         return vecAlign;
376                 }
377                 else
378                         return getDataTypeByteAlignment(basicType);
379         }
380         else if (type.isArrayType())
381                 return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
382         else
383         {
384                 DE_ASSERT(type.isStructType());
385
386                 int maxBaseAlignment = 0;
387                 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
388                         maxBaseAlignment = de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
389
390                 return maxBaseAlignment;
391         }
392 }
393
394 inline deUint32 mergeLayoutFlags (deUint32 prevFlags, deUint32 newFlags)
395 {
396         const deUint32  packingMask             = LAYOUT_STD430|LAYOUT_STD140|LAYOUT_RELAXED;
397         const deUint32  matrixMask              = LAYOUT_ROW_MAJOR|LAYOUT_COLUMN_MAJOR;
398
399         deUint32 mergedFlags = 0;
400
401         mergedFlags |= ((newFlags & packingMask)        ? newFlags : prevFlags) & packingMask;
402         mergedFlags |= ((newFlags & matrixMask)         ? newFlags : prevFlags) & matrixMask;
403
404         return mergedFlags;
405 }
406
407 template <class T>
408 bool isPow2(T powerOf2)
409 {
410         if (powerOf2 <= 0)
411                 return false;
412         return (powerOf2 & (powerOf2 - (T)1)) == (T)0;
413 }
414
415 template <class T>
416 T roundToPow2(T number, int powerOf2)
417 {
418         DE_ASSERT(isPow2(powerOf2));
419         return (number + (T)powerOf2 - (T)1) & (T)(~(powerOf2 - 1));
420 }
421
422 //! Appends all child elements to layout, returns value that should be appended to offset.
423 int computeReferenceLayout (
424         BufferLayout&           layout,
425         int                                     curBlockNdx,
426         int                                     baseOffset,
427         const std::string&      curPrefix,
428         const VarType&          type,
429         deUint32                        layoutFlags)
430 {
431         // Reference layout uses std430 rules by default. std140 rules are
432         // choosen only for blocks that have std140 layout.
433         const int       baseAlignment           = (layoutFlags & LAYOUT_STD140)  != 0 ? computeStd140BaseAlignment(type, layoutFlags)           :
434                                                                           (layoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(type, layoutFlags)     :
435                                                                           computeStd430BaseAlignment(type, layoutFlags);
436         int                     curOffset                       = deAlign32(baseOffset, baseAlignment);
437         const int       topLevelArraySize       = 1; // Default values
438         const int       topLevelArrayStride     = 0;
439
440         if (type.isBasicType())
441         {
442                 const glu::DataType             basicType       = type.getBasicType();
443                 BufferVarLayoutEntry    entry;
444
445                 entry.name                                      = curPrefix;
446                 entry.type                                      = basicType;
447                 entry.arraySize                         = 1;
448                 entry.arrayStride                       = 0;
449                 entry.matrixStride                      = 0;
450                 entry.topLevelArraySize         = topLevelArraySize;
451                 entry.topLevelArrayStride       = topLevelArrayStride;
452                 entry.blockNdx                          = curBlockNdx;
453
454                 if (glu::isDataTypeMatrix(basicType))
455                 {
456                         // Array of vectors as specified in rules 5 & 7.
457                         const bool      isRowMajor                      = !!(layoutFlags & LAYOUT_ROW_MAJOR);
458                         const int       numVecs                         = isRowMajor ? glu::getDataTypeMatrixNumRows(basicType)
459                                                                                                                  : glu::getDataTypeMatrixNumColumns(basicType);
460
461                         entry.offset            = curOffset;
462                         entry.matrixStride      = baseAlignment;
463                         entry.isRowMajor        = isRowMajor;
464
465                         curOffset += numVecs*baseAlignment;
466                 }
467                 else
468                 {
469                         if (glu::isDataTypeVector(basicType) && (getDataTypeByteSize(basicType) <= 16 ? curOffset / 16 != (curOffset +  getDataTypeByteSize(basicType) - 1) / 16 : curOffset % 16 != 0) && (layoutFlags & LAYOUT_RELAXED))
470                                 curOffset = roundToPow2(curOffset, 16);
471
472                         // Scalar or vector.
473                         entry.offset = curOffset;
474
475                         curOffset += getDataTypeByteSize(basicType);
476                 }
477
478                 layout.bufferVars.push_back(entry);
479         }
480         else if (type.isArrayType())
481         {
482                 const VarType&  elemType        = type.getElementType();
483
484                 if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
485                 {
486                         // Array of scalars or vectors.
487                         const glu::DataType             elemBasicType   = elemType.getBasicType();
488                         const int                               stride                  = baseAlignment;
489                         BufferVarLayoutEntry    entry;
490
491                         entry.name                                      = curPrefix + "[0]"; // Array variables are always postfixed with [0]
492                         entry.type                                      = elemBasicType;
493                         entry.blockNdx                          = curBlockNdx;
494                         entry.offset                            = curOffset;
495                         entry.arraySize                         = type.getArraySize();
496                         entry.arrayStride                       = stride;
497                         entry.matrixStride                      = 0;
498                         entry.topLevelArraySize         = topLevelArraySize;
499                         entry.topLevelArrayStride       = topLevelArrayStride;
500
501                         curOffset += stride*type.getArraySize();
502
503                         layout.bufferVars.push_back(entry);
504                 }
505                 else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
506                 {
507                         // Array of matrices.
508                         const glu::DataType                     elemBasicType   = elemType.getBasicType();
509                         const bool                                      isRowMajor              = !!(layoutFlags & LAYOUT_ROW_MAJOR);
510                         const int                                       numVecs                 = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
511                                                                                                                                          : glu::getDataTypeMatrixNumColumns(elemBasicType);
512                         const int                                       vecStride               = baseAlignment;
513                         BufferVarLayoutEntry            entry;
514
515                         entry.name                                      = curPrefix + "[0]"; // Array variables are always postfixed with [0]
516                         entry.type                                      = elemBasicType;
517                         entry.blockNdx                          = curBlockNdx;
518                         entry.offset                            = curOffset;
519                         entry.arraySize                         = type.getArraySize();
520                         entry.arrayStride                       = vecStride*numVecs;
521                         entry.matrixStride                      = vecStride;
522                         entry.isRowMajor                        = isRowMajor;
523                         entry.topLevelArraySize         = topLevelArraySize;
524                         entry.topLevelArrayStride       = topLevelArrayStride;
525
526                         curOffset += numVecs*vecStride*type.getArraySize();
527
528                         layout.bufferVars.push_back(entry);
529                 }
530                 else
531                 {
532                         DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
533
534                         for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
535                                 curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "[" + de::toString(elemNdx) + "]", type.getElementType(), layoutFlags);
536                 }
537         }
538         else
539         {
540                 DE_ASSERT(type.isStructType());
541
542                 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
543                         curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(), memberIter->getType(), layoutFlags);
544
545                 curOffset = deAlign32(curOffset, baseAlignment);
546         }
547
548         return curOffset-baseOffset;
549 }
550
551 //! Appends all child elements to layout, returns offset increment.
552 int computeReferenceLayout (BufferLayout& layout, int curBlockNdx, const std::string& blockPrefix, int baseOffset, const BufferVar& bufVar, deUint32 blockLayoutFlags)
553 {
554         const VarType&  varType                 = bufVar.getType();
555         const deUint32  combinedFlags   = mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
556
557         if (varType.isArrayType())
558         {
559                 // Top-level arrays need special care.
560                 const int               topLevelArraySize       = varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
561                 const string    prefix                          = blockPrefix + bufVar.getName() + "[0]";
562                 const bool              isStd140                        = (blockLayoutFlags & LAYOUT_STD140) != 0;
563                 const int               vec4Align                       = (int)sizeof(deUint32)*4;
564                 const int               baseAlignment           = isStd140                                                                      ? computeStd140BaseAlignment(varType, combinedFlags)            :
565                                                                                         (blockLayoutFlags & LAYOUT_RELAXED) != 0        ? computeRelaxedBlockBaseAlignment(varType, combinedFlags)      :
566                                                                                         computeStd430BaseAlignment(varType, combinedFlags);
567                 int                             curOffset                       = deAlign32(baseOffset, baseAlignment);
568                 const VarType&  elemType                        = varType.getElementType();
569
570                 if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
571                 {
572                         // Array of scalars or vectors.
573                         const glu::DataType             elemBasicType   = elemType.getBasicType();
574                         const int                               elemBaseAlign   = getDataTypeByteAlignment(elemBasicType);
575                         const int                               stride                  = isStd140 ? deAlign32(elemBaseAlign, vec4Align) : elemBaseAlign;
576                         BufferVarLayoutEntry    entry;
577
578                         entry.name                                      = prefix;
579                         entry.topLevelArraySize         = 1;
580                         entry.topLevelArrayStride       = 0;
581                         entry.type                                      = elemBasicType;
582                         entry.blockNdx                          = curBlockNdx;
583                         entry.offset                            = curOffset;
584                         entry.arraySize                         = topLevelArraySize;
585                         entry.arrayStride                       = stride;
586                         entry.matrixStride                      = 0;
587
588                         layout.bufferVars.push_back(entry);
589
590                         curOffset += stride*topLevelArraySize;
591                 }
592                 else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
593                 {
594                         // Array of matrices.
595                         const glu::DataType             elemBasicType   = elemType.getBasicType();
596                         const bool                              isRowMajor              = !!(combinedFlags & LAYOUT_ROW_MAJOR);
597                         const int                               vecSize                 = isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
598                                                                                                                                  : glu::getDataTypeMatrixNumRows(elemBasicType);
599                         const int                               numVecs                 = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
600                                                                                                                                  : glu::getDataTypeMatrixNumColumns(elemBasicType);
601                         const glu::DataType             vecType                 = glu::getDataTypeFloatVec(vecSize);
602                         const int                               vecBaseAlign    = getDataTypeByteAlignment(vecType);
603                         const int                               stride                  = isStd140 ? deAlign32(vecBaseAlign, vec4Align) : vecBaseAlign;
604                         BufferVarLayoutEntry    entry;
605
606                         entry.name                                      = prefix;
607                         entry.topLevelArraySize         = 1;
608                         entry.topLevelArrayStride       = 0;
609                         entry.type                                      = elemBasicType;
610                         entry.blockNdx                          = curBlockNdx;
611                         entry.offset                            = curOffset;
612                         entry.arraySize                         = topLevelArraySize;
613                         entry.arrayStride                       = stride*numVecs;
614                         entry.matrixStride                      = stride;
615                         entry.isRowMajor                        = isRowMajor;
616
617                         layout.bufferVars.push_back(entry);
618
619                         curOffset += stride*numVecs*topLevelArraySize;
620                 }
621                 else
622                 {
623                         DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
624
625                         // Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
626                         // was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
627                         // before struct. Padding after struct will be added as it should.
628                         //
629                         // Stride could be computed prior to creating child elements, but it would essentially require running
630                         // the layout computation twice. Instead we fix stride to child elements afterwards.
631
632                         const int       firstChildNdx   = (int)layout.bufferVars.size();
633                         const int       stride                  = computeReferenceLayout(layout, curBlockNdx, curOffset, prefix, varType.getElementType(), combinedFlags);
634
635                         for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
636                         {
637                                 layout.bufferVars[childNdx].topLevelArraySize   = topLevelArraySize;
638                                 layout.bufferVars[childNdx].topLevelArrayStride = stride;
639                         }
640
641                         curOffset += stride*topLevelArraySize;
642                 }
643
644                 return curOffset-baseOffset;
645         }
646         else
647                 return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType, combinedFlags);
648 }
649
650 void computeReferenceLayout (BufferLayout& layout, ShaderInterface& interface)
651 {
652         int numBlocks = interface.getNumBlocks();
653
654         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
655         {
656                 BufferBlock&            block                   = interface.getBlock(blockNdx);
657                 bool                            hasInstanceName = block.getInstanceName() != DE_NULL;
658                 std::string                     blockPrefix             = hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
659                 int                                     curOffset               = 0;
660                 int                                     activeBlockNdx  = (int)layout.blocks.size();
661                 int                                     firstVarNdx             = (int)layout.bufferVars.size();
662
663                 size_t oldSize  = layout.bufferVars.size();
664                 for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
665                 {
666                         BufferVar& bufVar = *varIter;
667                         curOffset += computeReferenceLayout(layout, activeBlockNdx,  blockPrefix, curOffset, bufVar, block.getFlags());
668                         if (block.getFlags() & LAYOUT_RELAXED)
669                         {
670                                 DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
671                                 bufVar.setOffset(layout.bufferVars[oldSize].offset);
672                         }
673                         oldSize = layout.bufferVars.size();
674                 }
675
676                 int     varIndicesEnd   = (int)layout.bufferVars.size();
677                 int     blockSize               = curOffset;
678                 int     numInstances    = block.isArray() ? block.getArraySize() : 1;
679
680                 // Create block layout entries for each instance.
681                 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
682                 {
683                         // Allocate entry for instance.
684                         layout.blocks.push_back(BlockLayoutEntry());
685                         BlockLayoutEntry& blockEntry = layout.blocks.back();
686
687                         blockEntry.name = block.getBlockName();
688                         blockEntry.size = blockSize;
689
690                         // Compute active variable set for block.
691                         for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
692                                 blockEntry.activeVarIndices.push_back(varNdx);
693
694                         if (block.isArray())
695                                 blockEntry.name += "[" + de::toString(instanceNdx) + "]";
696                 }
697         }
698 }
699
700 // Value generator.
701
702 void generateValue (const BufferVarLayoutEntry& entry, int unsizedArraySize, void* basePtr, de::Random& rnd)
703 {
704         const glu::DataType     scalarType              = glu::getDataTypeScalarType(entry.type);
705         const int                       scalarSize              = glu::getDataTypeScalarSize(entry.type);
706         const int                       arraySize               = entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
707         const int                       arrayStride             = entry.arrayStride;
708         const int                       topLevelSize    = entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
709         const int                       topLevelStride  = entry.topLevelArrayStride;
710         const bool                      isMatrix                = glu::isDataTypeMatrix(entry.type);
711         const int                       numVecs                 = isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) : glu::getDataTypeMatrixNumColumns(entry.type)) : 1;
712         const int                       vecSize                 = scalarSize / numVecs;
713         const int                       compSize                = sizeof(deUint32);
714
715         DE_ASSERT(scalarSize%numVecs == 0);
716         DE_ASSERT(topLevelSize >= 0);
717         DE_ASSERT(arraySize >= 0);
718
719         for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
720         {
721                 deUint8* const topElemPtr = (deUint8*)basePtr + entry.offset + topElemNdx*topLevelStride;
722
723                 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
724                 {
725                         deUint8* const elemPtr = topElemPtr + elemNdx*arrayStride;
726
727                         for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
728                         {
729                                 deUint8* const vecPtr = elemPtr + (isMatrix ? vecNdx*entry.matrixStride : 0);
730
731                                 for (int compNdx = 0; compNdx < vecSize; compNdx++)
732                                 {
733                                         deUint8* const compPtr = vecPtr + compSize*compNdx;
734
735                                         switch (scalarType)
736                                         {
737                                                 case glu::TYPE_FLOAT:   *((float*)compPtr)              = (float)rnd.getInt(-9, 9);                                             break;
738                                                 case glu::TYPE_INT:             *((int*)compPtr)                = rnd.getInt(-9, 9);                                                    break;
739                                                 case glu::TYPE_UINT:    *((deUint32*)compPtr)   = (deUint32)rnd.getInt(0, 9);                                   break;
740                                                 // \note Random bit pattern is used for true values. Spec states that all non-zero values are
741                                                 //       interpreted as true but some implementations fail this.
742                                                 case glu::TYPE_BOOL:    *((deUint32*)compPtr)   = rnd.getBool() ? rnd.getUint32()|1u : 0u;              break;
743                                                 default:
744                                                         DE_ASSERT(false);
745                                         }
746                                 }
747                         }
748                 }
749         }
750 }
751
752 void generateValues (const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, deUint32 seed)
753 {
754         de::Random      rnd                     (seed);
755         const int       numBlocks       = (int)layout.blocks.size();
756
757         DE_ASSERT(numBlocks == (int)blockPointers.size());
758
759         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
760         {
761                 const BlockLayoutEntry& blockLayout     = layout.blocks[blockNdx];
762                 const BlockDataPtr&             blockPtr        = blockPointers[blockNdx];
763                 const int                               numEntries      = (int)layout.blocks[blockNdx].activeVarIndices.size();
764
765                 for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
766                 {
767                         const int                                       varNdx          = blockLayout.activeVarIndices[entryNdx];
768                         const BufferVarLayoutEntry&     varEntry        = layout.bufferVars[varNdx];
769
770                         generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
771                 }
772         }
773 }
774
775 // Shader generator.
776
777 const char* getCompareFuncForType (glu::DataType type)
778 {
779         switch (type)
780         {
781                 case glu::TYPE_FLOAT:                   return "bool compare_float    (highp float a, highp float b)  { return abs(a - b) < 0.05; }\n";
782                 case glu::TYPE_FLOAT_VEC2:              return "bool compare_vec2     (highp vec2 a, highp vec2 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }\n";
783                 case glu::TYPE_FLOAT_VEC3:              return "bool compare_vec3     (highp vec3 a, highp vec3 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }\n";
784                 case glu::TYPE_FLOAT_VEC4:              return "bool compare_vec4     (highp vec4 a, highp vec4 b)    { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }\n";
785                 case glu::TYPE_FLOAT_MAT2:              return "bool compare_mat2     (highp mat2 a, highp mat2 b)    { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }\n";
786                 case glu::TYPE_FLOAT_MAT2X3:    return "bool compare_mat2x3   (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); }\n";
787                 case glu::TYPE_FLOAT_MAT2X4:    return "bool compare_mat2x4   (highp mat2x4 a, highp mat2x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1]); }\n";
788                 case glu::TYPE_FLOAT_MAT3X2:    return "bool compare_mat3x2   (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }\n";
789                 case glu::TYPE_FLOAT_MAT3:              return "bool compare_mat3     (highp mat3 a, highp mat3 b)    { return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2]); }\n";
790                 case glu::TYPE_FLOAT_MAT3X4:    return "bool compare_mat3x4   (highp mat3x4 a, highp mat3x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2]); }\n";
791                 case glu::TYPE_FLOAT_MAT4X2:    return "bool compare_mat4x2   (highp mat4x2 a, highp mat4x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2])&&compare_vec2(a[3], b[3]); }\n";
792                 case glu::TYPE_FLOAT_MAT4X3:    return "bool compare_mat4x3   (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }\n";
793                 case glu::TYPE_FLOAT_MAT4:              return "bool compare_mat4     (highp mat4 a, highp mat4 b)    { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); }\n";
794                 case glu::TYPE_INT:                             return "bool compare_int      (highp int a, highp int b)      { return a == b; }\n";
795                 case glu::TYPE_INT_VEC2:                return "bool compare_ivec2    (highp ivec2 a, highp ivec2 b)  { return a == b; }\n";
796                 case glu::TYPE_INT_VEC3:                return "bool compare_ivec3    (highp ivec3 a, highp ivec3 b)  { return a == b; }\n";
797                 case glu::TYPE_INT_VEC4:                return "bool compare_ivec4    (highp ivec4 a, highp ivec4 b)  { return a == b; }\n";
798                 case glu::TYPE_UINT:                    return "bool compare_uint     (highp uint a, highp uint b)    { return a == b; }\n";
799                 case glu::TYPE_UINT_VEC2:               return "bool compare_uvec2    (highp uvec2 a, highp uvec2 b)  { return a == b; }\n";
800                 case glu::TYPE_UINT_VEC3:               return "bool compare_uvec3    (highp uvec3 a, highp uvec3 b)  { return a == b; }\n";
801                 case glu::TYPE_UINT_VEC4:               return "bool compare_uvec4    (highp uvec4 a, highp uvec4 b)  { return a == b; }\n";
802                 case glu::TYPE_BOOL:                    return "bool compare_bool     (bool a, bool b)                { return a == b; }\n";
803                 case glu::TYPE_BOOL_VEC2:               return "bool compare_bvec2    (bvec2 a, bvec2 b)              { return a == b; }\n";
804                 case glu::TYPE_BOOL_VEC3:               return "bool compare_bvec3    (bvec3 a, bvec3 b)              { return a == b; }\n";
805                 case glu::TYPE_BOOL_VEC4:               return "bool compare_bvec4    (bvec4 a, bvec4 b)              { return a == b; }\n";
806                 default:
807                         DE_ASSERT(false);
808                         return DE_NULL;
809         }
810 }
811
812 void getCompareDependencies (std::set<glu::DataType>& compareFuncs, glu::DataType basicType)
813 {
814         switch (basicType)
815         {
816                 case glu::TYPE_FLOAT_VEC2:
817                 case glu::TYPE_FLOAT_VEC3:
818                 case glu::TYPE_FLOAT_VEC4:
819                         compareFuncs.insert(glu::TYPE_FLOAT);
820                         compareFuncs.insert(basicType);
821                         break;
822
823                 case glu::TYPE_FLOAT_MAT2:
824                 case glu::TYPE_FLOAT_MAT2X3:
825                 case glu::TYPE_FLOAT_MAT2X4:
826                 case glu::TYPE_FLOAT_MAT3X2:
827                 case glu::TYPE_FLOAT_MAT3:
828                 case glu::TYPE_FLOAT_MAT3X4:
829                 case glu::TYPE_FLOAT_MAT4X2:
830                 case glu::TYPE_FLOAT_MAT4X3:
831                 case glu::TYPE_FLOAT_MAT4:
832                         compareFuncs.insert(glu::TYPE_FLOAT);
833                         compareFuncs.insert(glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType)));
834                         compareFuncs.insert(basicType);
835                         break;
836
837                 default:
838                         compareFuncs.insert(basicType);
839                         break;
840         }
841 }
842
843 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const VarType& type)
844 {
845         if (type.isStructType())
846         {
847                 for (StructType::ConstIterator iter = type.getStructPtr()->begin(); iter != type.getStructPtr()->end(); ++iter)
848                         collectUniqueBasicTypes(basicTypes, iter->getType());
849         }
850         else if (type.isArrayType())
851                 collectUniqueBasicTypes(basicTypes, type.getElementType());
852         else
853         {
854                 DE_ASSERT(type.isBasicType());
855                 basicTypes.insert(type.getBasicType());
856         }
857 }
858
859 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const BufferBlock& bufferBlock)
860 {
861         for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
862                 collectUniqueBasicTypes(basicTypes, iter->getType());
863 }
864
865 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const ShaderInterface& interface)
866 {
867         for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
868                 collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
869 }
870
871 void generateCompareFuncs (std::ostream& str, const ShaderInterface& interface)
872 {
873         std::set<glu::DataType> types;
874         std::set<glu::DataType> compareFuncs;
875
876         // Collect unique basic types
877         collectUniqueBasicTypes(types, interface);
878
879         // Set of compare functions required
880         for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
881         {
882                 getCompareDependencies(compareFuncs, *iter);
883         }
884
885         for (int type = 0; type < glu::TYPE_LAST; ++type)
886         {
887                 if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
888                         str << getCompareFuncForType(glu::DataType(type));
889         }
890 }
891
892 bool usesRelaxedLayout (const ShaderInterface& interface)
893 {
894         //If any of blocks has LAYOUT_RELAXED flag
895         for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
896         {
897                 if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
898                         return true;
899         }
900         return false;
901 }
902
903 struct Indent
904 {
905         int level;
906         Indent (int level_) : level(level_) {}
907 };
908
909 std::ostream& operator<< (std::ostream& str, const Indent& indent)
910 {
911         for (int i = 0; i < indent.level; i++)
912                 str << "\t";
913         return str;
914 }
915
916 void generateDeclaration (std::ostream& src, const BufferVar& bufferVar, int indentLevel)
917 {
918         // \todo [pyry] Qualifiers
919         if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
920                 src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
921         else if (bufferVar.getOffset()!= ~0u)
922                 src << "layout(offset = "<<bufferVar.getOffset()<<") ";
923
924         src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
925 }
926
927 void generateDeclaration (std::ostream& src, const BufferBlock& block, int bindingPoint)
928 {
929         src << "layout(";
930         if ((block.getFlags() & LAYOUT_MASK) != 0)
931                 src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
932
933         src << "binding = " << bindingPoint;
934
935         src << ") ";
936
937         src << "buffer " << block.getBlockName();
938         src << "\n{\n";
939
940         for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
941         {
942                 src << Indent(1);
943
944                 generateDeclaration(src, *varIter, 1 /* indent level */);
945                 src << ";\n";
946         }
947
948         src << "}";
949
950         if (block.getInstanceName() != DE_NULL)
951         {
952                 src << " " << block.getInstanceName();
953                 if (block.isArray())
954                         src << "[" << block.getArraySize() << "]";
955         }
956         else
957                 DE_ASSERT(!block.isArray());
958
959         src << ";\n";
960 }
961
962 void generateImmMatrixSrc (std::ostream& src, glu::DataType basicType, int matrixStride, bool isRowMajor, const void* valuePtr)
963 {
964         DE_ASSERT(glu::isDataTypeMatrix(basicType));
965
966         const int               compSize                = sizeof(deUint32);
967         const int               numRows                 = glu::getDataTypeMatrixNumRows(basicType);
968         const int               numCols                 = glu::getDataTypeMatrixNumColumns(basicType);
969
970         src << glu::getDataTypeName(basicType) << "(";
971
972         // Constructed in column-wise order.
973         for (int colNdx = 0; colNdx < numCols; colNdx++)
974         {
975                 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
976                 {
977                         const deUint8*  compPtr = (const deUint8*)valuePtr + (isRowMajor ? rowNdx*matrixStride + colNdx*compSize
978                                                                                                                                                                 : colNdx*matrixStride + rowNdx*compSize);
979
980                         if (colNdx > 0 || rowNdx > 0)
981                                 src << ", ";
982
983                         src << de::floatToString(*((const float*)compPtr), 1);
984                 }
985         }
986
987         src << ")";
988 }
989
990 void generateImmMatrixSrc (std::ostream& src,
991                                                    glu::DataType basicType,
992                                                    int matrixStride,
993                                                    bool isRowMajor,
994                                                    const void* valuePtr,
995                                                    const char* resultVar,
996                                                    const char* typeName,
997                                                    const string shaderName)
998 {
999         const int               compSize                = sizeof(deUint32);
1000         const int               numRows                 = glu::getDataTypeMatrixNumRows(basicType);
1001         const int               numCols                 = glu::getDataTypeMatrixNumColumns(basicType);
1002
1003         typeName = "float";
1004         for (int colNdex = 0; colNdex < numCols; colNdex++)
1005         {
1006                 for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1007                 {
1008                         src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << "[" << colNdex << "][" << rowNdex << "], ";
1009                         const deUint8*  compPtr = (const deUint8*)valuePtr + (isRowMajor ? rowNdex*matrixStride + colNdex*compSize
1010                                                                                                                                                                                 : colNdex*matrixStride + rowNdex*compSize);
1011
1012                         src << de::floatToString(*((const float*)compPtr), 1);
1013                         src << ");\n";
1014                 }
1015         }
1016
1017         typeName = "vec";
1018         for (int colNdex = 0; colNdex < numCols; colNdex++)
1019         {
1020                 src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], " << typeName << numRows << "(";
1021                 for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1022                 {
1023                         const deUint8*  compPtr = (const deUint8*)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize)
1024                                                                                                                                   : (colNdex * matrixStride + rowNdex * compSize));
1025                         src << de::floatToString(*((const float*)compPtr), 1);
1026
1027                         if (rowNdex < numRows-1)
1028                                 src << ", ";
1029                 }
1030                 src << "));\n";
1031         }
1032 }
1033
1034 void generateImmScalarVectorSrc (std::ostream& src, glu::DataType basicType, const void* valuePtr)
1035 {
1036         DE_ASSERT(glu::isDataTypeFloatOrVec(basicType)  ||
1037                           glu::isDataTypeIntOrIVec(basicType)   ||
1038                           glu::isDataTypeUintOrUVec(basicType)  ||
1039                           glu::isDataTypeBoolOrBVec(basicType));
1040
1041         const glu::DataType             scalarType              = glu::getDataTypeScalarType(basicType);
1042         const int                               scalarSize              = glu::getDataTypeScalarSize(basicType);
1043         const int                               compSize                = sizeof(deUint32);
1044
1045         if (scalarSize > 1)
1046                 src << glu::getDataTypeName(basicType) << "(";
1047
1048         for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
1049         {
1050                 const deUint8* compPtr = (const deUint8*)valuePtr + scalarNdx*compSize;
1051
1052                 if (scalarNdx > 0)
1053                         src << ", ";
1054
1055                 switch (scalarType)
1056                 {
1057                         case glu::TYPE_FLOAT:   src << de::floatToString(*((const float*)compPtr), 1);                  break;
1058                         case glu::TYPE_INT:             src << *((const int*)compPtr);                                                                  break;
1059                         case glu::TYPE_UINT:    src << *((const deUint32*)compPtr) << "u";                                              break;
1060                         case glu::TYPE_BOOL:    src << (*((const deUint32*)compPtr) != 0u ? "true" : "false");  break;
1061                         default:
1062                                 DE_ASSERT(false);
1063                 }
1064         }
1065
1066         if (scalarSize > 1)
1067                 src << ")";
1068 }
1069
1070 string getAPIName (const BufferBlock& block, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1071 {
1072         std::ostringstream name;
1073
1074         if (block.getInstanceName())
1075                 name << block.getBlockName() << ".";
1076
1077         name << var.getName();
1078
1079         for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1080         {
1081                 if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1082                 {
1083                         const VarType           curType         = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1084                         const StructType*       structPtr       = curType.getStructPtr();
1085
1086                         name << "." << structPtr->getMember(pathComp->index).getName();
1087                 }
1088                 else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1089                 {
1090                         if (pathComp == accessPath.begin() || (pathComp+1) == accessPath.end())
1091                                 name << "[0]"; // Top- / bottom-level array
1092                         else
1093                                 name << "[" << pathComp->index << "]";
1094                 }
1095                 else
1096                         DE_ASSERT(false);
1097         }
1098
1099         return name.str();
1100 }
1101
1102 string getShaderName (const BufferBlock& block, int instanceNdx, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1103 {
1104         std::ostringstream name;
1105
1106         if (block.getInstanceName())
1107         {
1108                 name << block.getInstanceName();
1109
1110                 if (block.isArray())
1111                         name << "[" << instanceNdx << "]";
1112
1113                 name << ".";
1114         }
1115         else
1116                 DE_ASSERT(instanceNdx == 0);
1117
1118         name << var.getName();
1119
1120         for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1121         {
1122                 if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1123                 {
1124                         const VarType           curType         = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1125                         const StructType*       structPtr       = curType.getStructPtr();
1126
1127                         name << "." << structPtr->getMember(pathComp->index).getName();
1128                 }
1129                 else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1130                         name << "[" << pathComp->index << "]";
1131                 else
1132                         DE_ASSERT(false);
1133         }
1134
1135         return name.str();
1136 }
1137
1138 int computeOffset (const BufferVarLayoutEntry& varLayout, const glu::TypeComponentVector& accessPath)
1139 {
1140         const int       topLevelNdx             = (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.front().index : 0;
1141         const int       bottomLevelNdx  = (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.back().index : 0;
1142
1143         return varLayout.offset + varLayout.topLevelArrayStride*topLevelNdx + varLayout.arrayStride*bottomLevelNdx;
1144 }
1145
1146 void generateCompareSrc (
1147         std::ostream&                           src,
1148         const char*                                     resultVar,
1149         const BufferLayout&                     bufferLayout,
1150         const BufferBlock&                      block,
1151         int                                                     instanceNdx,
1152         const BlockDataPtr&                     blockPtr,
1153         const BufferVar&                        bufVar,
1154         const glu::SubTypeAccess&       accessPath,
1155         MatrixLoadFlags                         matrixLoadFlag)
1156 {
1157         const VarType curType = accessPath.getType();
1158
1159         if (curType.isArrayType())
1160         {
1161                 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1162
1163                 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1164                         generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx), LOAD_FULL_MATRIX);
1165         }
1166         else if (curType.isStructType())
1167         {
1168                 const int numMembers = curType.getStructPtr()->getNumMembers();
1169
1170                 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1171                         generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx), LOAD_FULL_MATRIX);
1172         }
1173         else
1174         {
1175                 DE_ASSERT(curType.isBasicType());
1176
1177                 const string    apiName = getAPIName(block, bufVar, accessPath.getPath());
1178                 const int               varNdx  = bufferLayout.getVariableIndex(apiName);
1179
1180                 DE_ASSERT(varNdx >= 0);
1181                 {
1182                         const BufferVarLayoutEntry&     varLayout               = bufferLayout.bufferVars[varNdx];
1183                         const string                            shaderName              = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1184                         const glu::DataType                     basicType               = curType.getBasicType();
1185                         const bool                                      isMatrix                = glu::isDataTypeMatrix(basicType);
1186                         const char*                                     typeName                = glu::getDataTypeName(basicType);
1187                         const void*                                     valuePtr                = (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1188
1189
1190                         if (isMatrix)
1191                         {
1192                                 if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
1193                                         generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr, resultVar, typeName, shaderName);
1194                                 else
1195                                 {
1196                                         src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
1197                                         generateImmMatrixSrc (src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
1198                                         src << ");\n";
1199                                 }
1200                         }
1201                         else
1202                         {
1203                                 src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
1204                                 generateImmScalarVectorSrc(src, basicType, valuePtr);
1205                                 src << ");\n";
1206                         }
1207                 }
1208         }
1209 }
1210
1211 void generateCompareSrc (std::ostream& src, const char* resultVar, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, MatrixLoadFlags matrixLoadFlag)
1212 {
1213         for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1214         {
1215                 const BufferBlock&      block                   = interface.getBlock(declNdx);
1216                 const bool                      isArray                 = block.isArray();
1217                 const int                       numInstances    = isArray ? block.getArraySize() : 1;
1218
1219                 DE_ASSERT(!isArray || block.getInstanceName());
1220
1221                 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1222                 {
1223                         const string            instanceName    = block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1224                         const int                       blockNdx                = layout.getBlockIndex(instanceName);
1225                         const BlockDataPtr&     blockPtr                = blockPointers[blockNdx];
1226
1227                         for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1228                         {
1229                                 const BufferVar& bufVar = *varIter;
1230
1231                                 if ((bufVar.getFlags() & ACCESS_READ) == 0)
1232                                         continue; // Don't read from that variable.
1233
1234                                 generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag);
1235                         }
1236                 }
1237         }
1238 }
1239
1240 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
1241
1242 void generateWriteSrc (
1243         std::ostream&                           src,
1244         const BufferLayout&                     bufferLayout,
1245         const BufferBlock&                      block,
1246         int                                                     instanceNdx,
1247         const BlockDataPtr&                     blockPtr,
1248         const BufferVar&                        bufVar,
1249         const glu::SubTypeAccess&       accessPath)
1250 {
1251         const VarType curType = accessPath.getType();
1252
1253         if (curType.isArrayType())
1254         {
1255                 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1256
1257                 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1258                         generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx));
1259         }
1260         else if (curType.isStructType())
1261         {
1262                 const int numMembers = curType.getStructPtr()->getNumMembers();
1263
1264                 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1265                         generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx));
1266         }
1267         else
1268         {
1269                 DE_ASSERT(curType.isBasicType());
1270
1271                 const string    apiName = getAPIName(block, bufVar, accessPath.getPath());
1272                 const int               varNdx  = bufferLayout.getVariableIndex(apiName);
1273
1274                 DE_ASSERT(varNdx >= 0);
1275                 {
1276                         const BufferVarLayoutEntry&     varLayout               = bufferLayout.bufferVars[varNdx];
1277                         const string                            shaderName              = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1278                         const glu::DataType                     basicType               = curType.getBasicType();
1279                         const bool                                      isMatrix                = glu::isDataTypeMatrix(basicType);
1280                         const void*                                     valuePtr                = (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1281
1282                         src << "\t" << shaderName << " = ";
1283
1284                         if (isMatrix)
1285                                 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
1286                         else
1287                                 generateImmScalarVectorSrc(src, basicType, valuePtr);
1288
1289                         src << ";\n";
1290                 }
1291         }
1292 }
1293
1294 void generateWriteSrc (std::ostream& src, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers)
1295 {
1296         for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1297         {
1298                 const BufferBlock&      block                   = interface.getBlock(declNdx);
1299                 const bool                      isArray                 = block.isArray();
1300                 const int                       numInstances    = isArray ? block.getArraySize() : 1;
1301
1302                 DE_ASSERT(!isArray || block.getInstanceName());
1303
1304                 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1305                 {
1306                         const string            instanceName    = block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1307                         const int                       blockNdx                = layout.getBlockIndex(instanceName);
1308                         const BlockDataPtr&     blockPtr                = blockPointers[blockNdx];
1309
1310                         for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1311                         {
1312                                 const BufferVar& bufVar = *varIter;
1313
1314                                 if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
1315                                         continue; // Don't write to that variable.
1316
1317                                 generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1318                         }
1319                 }
1320         }
1321 }
1322
1323 string generateComputeShader (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& comparePtrs, const vector<BlockDataPtr>& writePtrs, MatrixLoadFlags matrixLoadFlag)
1324 {
1325         std::ostringstream src;
1326
1327         if (usesRelaxedLayout(interface))
1328                 src << "#version 450\n";
1329         else
1330                 src << "#version 310 es\n";
1331
1332         src << "layout(local_size_x = 1) in;\n";
1333         src << "\n";
1334
1335         // Atomic counter for counting passed invocations.
1336         src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
1337
1338         std::vector<const StructType*> namedStructs;
1339         interface.getNamedStructs(namedStructs);
1340         for (std::vector<const StructType*>::const_iterator structIter = namedStructs.begin(); structIter != namedStructs.end(); structIter++)
1341                 src << glu::declare(*structIter) << ";\n";
1342
1343         {
1344                 for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1345                 {
1346                         const BufferBlock& block = interface.getBlock(blockNdx);
1347                         generateDeclaration(src, block, 1 + blockNdx);
1348                 }
1349         }
1350
1351         // Comparison utilities.
1352         src << "\n";
1353         generateCompareFuncs(src, interface);
1354
1355         src << "\n"
1356                    "void main (void)\n"
1357                    "{\n"
1358                    "    bool allOk = true;\n";
1359
1360         // Value compare.
1361         generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
1362
1363         src << "        if (allOk)\n"
1364                 << "            ac_numPassed++;\n"
1365                 << "\n";
1366
1367         // Value write.
1368         generateWriteSrc(src, interface, layout, writePtrs);
1369
1370         src << "}\n";
1371
1372         return src.str();
1373 }
1374
1375 void copyBufferVarData (const BufferVarLayoutEntry& dstEntry, const BlockDataPtr& dstBlockPtr, const BufferVarLayoutEntry& srcEntry, const BlockDataPtr& srcBlockPtr)
1376 {
1377         DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
1378         DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
1379         DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
1380         DE_ASSERT(dstEntry.type == srcEntry.type);
1381
1382         deUint8* const                  dstBasePtr                      = (deUint8*)dstBlockPtr.ptr + dstEntry.offset;
1383         const deUint8* const    srcBasePtr                      = (const deUint8*)srcBlockPtr.ptr + srcEntry.offset;
1384         const int                               scalarSize                      = glu::getDataTypeScalarSize(dstEntry.type);
1385         const bool                              isMatrix                        = glu::isDataTypeMatrix(dstEntry.type);
1386         const int                               compSize                        = sizeof(deUint32);
1387         const int                               dstArraySize            = dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
1388         const int                               dstArrayStride          = dstEntry.arrayStride;
1389         const int                               dstTopLevelSize         = dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
1390         const int                               dstTopLevelStride       = dstEntry.topLevelArrayStride;
1391         const int                               srcArraySize            = srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
1392         const int                               srcArrayStride          = srcEntry.arrayStride;
1393         const int                               srcTopLevelSize         = srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
1394         const int                               srcTopLevelStride       = srcEntry.topLevelArrayStride;
1395
1396         DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
1397         DE_UNREF(srcArraySize && srcTopLevelSize);
1398
1399         for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
1400         {
1401                 deUint8* const                  dstTopPtr       = dstBasePtr + topElemNdx*dstTopLevelStride;
1402                 const deUint8* const    srcTopPtr       = srcBasePtr + topElemNdx*srcTopLevelStride;
1403
1404                 for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
1405                 {
1406                         deUint8* const                  dstElemPtr      = dstTopPtr + elementNdx*dstArrayStride;
1407                         const deUint8* const    srcElemPtr      = srcTopPtr + elementNdx*srcArrayStride;
1408
1409                         if (isMatrix)
1410                         {
1411                                 const int       numRows = glu::getDataTypeMatrixNumRows(dstEntry.type);
1412                                 const int       numCols = glu::getDataTypeMatrixNumColumns(dstEntry.type);
1413
1414                                 for (int colNdx = 0; colNdx < numCols; colNdx++)
1415                                 {
1416                                         for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1417                                         {
1418                                                 deUint8*                dstCompPtr      = dstElemPtr + (dstEntry.isRowMajor ? rowNdx*dstEntry.matrixStride + colNdx*compSize
1419                                                                                                                                                                                 : colNdx*dstEntry.matrixStride + rowNdx*compSize);
1420                                                 const deUint8*  srcCompPtr      = srcElemPtr + (srcEntry.isRowMajor ? rowNdx*srcEntry.matrixStride + colNdx*compSize
1421                                                                                                                                                                                 : colNdx*srcEntry.matrixStride + rowNdx*compSize);
1422
1423                                                 DE_ASSERT((deIntptr)(srcCompPtr + compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1424                                                 DE_ASSERT((deIntptr)(dstCompPtr + compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1425                                                 deMemcpy(dstCompPtr, srcCompPtr, compSize);
1426                                         }
1427                                 }
1428                         }
1429                         else
1430                         {
1431                                 DE_ASSERT((deIntptr)(srcElemPtr + scalarSize*compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1432                                 DE_ASSERT((deIntptr)(dstElemPtr + scalarSize*compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1433                                 deMemcpy(dstElemPtr, srcElemPtr, scalarSize*compSize);
1434                         }
1435                 }
1436         }
1437 }
1438
1439 void copyData (const BufferLayout& dstLayout, const vector<BlockDataPtr>& dstBlockPointers, const BufferLayout& srcLayout, const vector<BlockDataPtr>& srcBlockPointers)
1440 {
1441         // \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
1442         int numBlocks = (int)srcLayout.blocks.size();
1443
1444         for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
1445         {
1446                 const BlockLayoutEntry&         srcBlock        = srcLayout.blocks[srcBlockNdx];
1447                 const BlockDataPtr&                     srcBlockPtr     = srcBlockPointers[srcBlockNdx];
1448                 int                                                     dstBlockNdx     = dstLayout.getBlockIndex(srcBlock.name.c_str());
1449
1450                 if (dstBlockNdx >= 0)
1451                 {
1452                         DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
1453
1454                         const BlockDataPtr& dstBlockPtr = dstBlockPointers[dstBlockNdx];
1455
1456                         for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin(); srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
1457                         {
1458                                 const BufferVarLayoutEntry&     srcEntry        = srcLayout.bufferVars[*srcVarNdxIter];
1459                                 int                                                     dstVarNdx       = dstLayout.getVariableIndex(srcEntry.name.c_str());
1460
1461                                 if (dstVarNdx >= 0)
1462                                         copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
1463                         }
1464                 }
1465         }
1466 }
1467
1468 void copyNonWrittenData (
1469         const BufferLayout&                     layout,
1470         const BufferBlock&                      block,
1471         int                                                     instanceNdx,
1472         const BlockDataPtr&                     srcBlockPtr,
1473         const BlockDataPtr&                     dstBlockPtr,
1474         const BufferVar&                        bufVar,
1475         const glu::SubTypeAccess&       accessPath)
1476 {
1477         const VarType curType = accessPath.getType();
1478
1479         if (curType.isArrayType())
1480         {
1481                 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1482
1483                 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1484                         copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.element(elemNdx));
1485         }
1486         else if (curType.isStructType())
1487         {
1488                 const int numMembers = curType.getStructPtr()->getNumMembers();
1489
1490                 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1491                         copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.member(memberNdx));
1492         }
1493         else
1494         {
1495                 DE_ASSERT(curType.isBasicType());
1496
1497                 const string    apiName = getAPIName(block, bufVar, accessPath.getPath());
1498                 const int               varNdx  = layout.getVariableIndex(apiName);
1499
1500                 DE_ASSERT(varNdx >= 0);
1501                 {
1502                         const BufferVarLayoutEntry& varLayout = layout.bufferVars[varNdx];
1503                         copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
1504                 }
1505         }
1506 }
1507
1508 void copyNonWrittenData (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& srcPtrs, const vector<BlockDataPtr>& dstPtrs)
1509 {
1510         for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1511         {
1512                 const BufferBlock&      block                   = interface.getBlock(declNdx);
1513                 const bool                      isArray                 = block.isArray();
1514                 const int                       numInstances    = isArray ? block.getArraySize() : 1;
1515
1516                 DE_ASSERT(!isArray || block.getInstanceName());
1517
1518                 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1519                 {
1520                         const string            instanceName    = block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1521                         const int                       blockNdx                = layout.getBlockIndex(instanceName);
1522                         const BlockDataPtr&     srcBlockPtr             = srcPtrs[blockNdx];
1523                         const BlockDataPtr&     dstBlockPtr             = dstPtrs[blockNdx];
1524
1525                         for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1526                         {
1527                                 const BufferVar& bufVar = *varIter;
1528
1529                                 if (bufVar.getFlags() & ACCESS_WRITE)
1530                                         continue;
1531
1532                                 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1533                         }
1534                 }
1535         }
1536 }
1537
1538 bool compareComponents (glu::DataType scalarType, const void* ref, const void* res, int numComps)
1539 {
1540         if (scalarType == glu::TYPE_FLOAT)
1541         {
1542                 const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
1543
1544                 for (int ndx = 0; ndx < numComps; ndx++)
1545                 {
1546                         const float             refVal          = *((const float*)ref + ndx);
1547                         const float             resVal          = *((const float*)res + ndx);
1548
1549                         if (deFloatAbs(resVal - refVal) >= threshold)
1550                                 return false;
1551                 }
1552         }
1553         else if (scalarType == glu::TYPE_BOOL)
1554         {
1555                 for (int ndx = 0; ndx < numComps; ndx++)
1556                 {
1557                         const deUint32  refVal          = *((const deUint32*)ref + ndx);
1558                         const deUint32  resVal          = *((const deUint32*)res + ndx);
1559
1560                         if ((refVal != 0) != (resVal != 0))
1561                                 return false;
1562                 }
1563         }
1564         else
1565         {
1566                 DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
1567
1568                 for (int ndx = 0; ndx < numComps; ndx++)
1569                 {
1570                         const deUint32  refVal          = *((const deUint32*)ref + ndx);
1571                         const deUint32  resVal          = *((const deUint32*)res + ndx);
1572
1573                         if (refVal != resVal)
1574                                 return false;
1575                 }
1576         }
1577
1578         return true;
1579 }
1580
1581 bool compareBufferVarData (tcu::TestLog& log, const BufferVarLayoutEntry& refEntry, const BlockDataPtr& refBlockPtr, const BufferVarLayoutEntry& resEntry, const BlockDataPtr& resBlockPtr)
1582 {
1583         DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
1584         DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
1585         DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
1586         DE_ASSERT(resEntry.type == refEntry.type);
1587
1588         deUint8* const                  resBasePtr                      = (deUint8*)resBlockPtr.ptr + resEntry.offset;
1589         const deUint8* const    refBasePtr                      = (const deUint8*)refBlockPtr.ptr + refEntry.offset;
1590         const glu::DataType             scalarType                      = glu::getDataTypeScalarType(refEntry.type);
1591         const int                               scalarSize                      = glu::getDataTypeScalarSize(resEntry.type);
1592         const bool                              isMatrix                        = glu::isDataTypeMatrix(resEntry.type);
1593         const int                               compSize                        = sizeof(deUint32);
1594         const int                               maxPrints                       = 3;
1595         int                                             numFailed                       = 0;
1596
1597         const int                               resArraySize            = resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
1598         const int                               resArrayStride          = resEntry.arrayStride;
1599         const int                               resTopLevelSize         = resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
1600         const int                               resTopLevelStride       = resEntry.topLevelArrayStride;
1601         const int                               refArraySize            = refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
1602         const int                               refArrayStride          = refEntry.arrayStride;
1603         const int                               refTopLevelSize         = refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
1604         const int                               refTopLevelStride       = refEntry.topLevelArrayStride;
1605
1606         DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
1607         DE_UNREF(refArraySize && refTopLevelSize);
1608
1609         for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
1610         {
1611                 deUint8* const                  resTopPtr       = resBasePtr + topElemNdx*resTopLevelStride;
1612                 const deUint8* const    refTopPtr       = refBasePtr + topElemNdx*refTopLevelStride;
1613
1614                 for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
1615                 {
1616                         deUint8* const                  resElemPtr      = resTopPtr + elementNdx*resArrayStride;
1617                         const deUint8* const    refElemPtr      = refTopPtr + elementNdx*refArrayStride;
1618
1619                         if (isMatrix)
1620                         {
1621                                 const int       numRows = glu::getDataTypeMatrixNumRows(resEntry.type);
1622                                 const int       numCols = glu::getDataTypeMatrixNumColumns(resEntry.type);
1623                                 bool            isOk    = true;
1624
1625                                 for (int colNdx = 0; colNdx < numCols; colNdx++)
1626                                 {
1627                                         for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1628                                         {
1629                                                 deUint8*                resCompPtr      = resElemPtr + (resEntry.isRowMajor ? rowNdx*resEntry.matrixStride + colNdx*compSize
1630                                                                                                                                                                                 : colNdx*resEntry.matrixStride + rowNdx*compSize);
1631                                                 const deUint8*  refCompPtr      = refElemPtr + (refEntry.isRowMajor ? rowNdx*refEntry.matrixStride + colNdx*compSize
1632                                                                                                                                                                                 : colNdx*refEntry.matrixStride + rowNdx*compSize);
1633
1634                                                 DE_ASSERT((deIntptr)(refCompPtr + compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1635                                                 DE_ASSERT((deIntptr)(resCompPtr + compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1636
1637                                                 isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
1638                                         }
1639                                 }
1640
1641                                 if (!isOk)
1642                                 {
1643                                         numFailed += 1;
1644                                         if (numFailed < maxPrints)
1645                                         {
1646                                                 std::ostringstream expected, got;
1647                                                 generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, refElemPtr);
1648                                                 generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, resElemPtr);
1649                                                 log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1650                                                                                                 << "  expected " << expected.str() << "\n"
1651                                                                                                 << "  got " << got.str()
1652                                                         << TestLog::EndMessage;
1653                                         }
1654                                 }
1655                         }
1656                         else
1657                         {
1658                                 DE_ASSERT((deIntptr)(refElemPtr + scalarSize*compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1659                                 DE_ASSERT((deIntptr)(resElemPtr + scalarSize*compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1660
1661                                 const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
1662
1663                                 if (!isOk)
1664                                 {
1665                                         numFailed += 1;
1666                                         if (numFailed < maxPrints)
1667                                         {
1668                                                 std::ostringstream expected, got;
1669                                                 generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
1670                                                 generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
1671                                                 log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1672                                                                                                 << "  expected " << expected.str() << "\n"
1673                                                                                                 << "  got " << got.str()
1674                                                         << TestLog::EndMessage;
1675                                         }
1676                                 }
1677                         }
1678                 }
1679         }
1680
1681         if (numFailed >= maxPrints)
1682                 log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)" << TestLog::EndMessage;
1683
1684         return numFailed == 0;
1685 }
1686
1687 bool compareData (tcu::TestLog& log, const BufferLayout& refLayout, const vector<BlockDataPtr>& refBlockPointers, const BufferLayout& resLayout, const vector<BlockDataPtr>& resBlockPointers)
1688 {
1689         const int       numBlocks       = (int)refLayout.blocks.size();
1690         bool            allOk           = true;
1691
1692         for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
1693         {
1694                 const BlockLayoutEntry&         refBlock        = refLayout.blocks[refBlockNdx];
1695                 const BlockDataPtr&                     refBlockPtr     = refBlockPointers[refBlockNdx];
1696                 int                                                     resBlockNdx     = resLayout.getBlockIndex(refBlock.name.c_str());
1697
1698                 if (resBlockNdx >= 0)
1699                 {
1700                         DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
1701
1702                         const BlockDataPtr& resBlockPtr = resBlockPointers[resBlockNdx];
1703
1704                         for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin(); refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
1705                         {
1706                                 const BufferVarLayoutEntry&     refEntry        = refLayout.bufferVars[*refVarNdxIter];
1707                                 int                                                     resVarNdx       = resLayout.getVariableIndex(refEntry.name.c_str());
1708
1709                                 if (resVarNdx >= 0)
1710                                 {
1711                                         const BufferVarLayoutEntry& resEntry = resLayout.bufferVars[resVarNdx];
1712                                         allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
1713                                 }
1714                         }
1715                 }
1716         }
1717
1718         return allOk;
1719 }
1720
1721 string getBlockAPIName (const BufferBlock& block, int instanceNdx)
1722 {
1723         DE_ASSERT(block.isArray() || instanceNdx == 0);
1724         return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
1725 }
1726
1727 // \note Some implementations don't report block members in the order they are declared.
1728 //               For checking whether size has to be adjusted by some top-level array actual size,
1729 //               we only need to know a) whether there is a unsized top-level array, and b)
1730 //               what is stride of that array.
1731
1732 static bool hasUnsizedArray (const BufferLayout& layout, const BlockLayoutEntry& entry)
1733 {
1734         for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1735         {
1736                 if (isUnsizedArray(layout.bufferVars[*varNdx]))
1737                         return true;
1738         }
1739
1740         return false;
1741 }
1742
1743 static int getUnsizedArrayStride (const BufferLayout& layout, const BlockLayoutEntry& entry)
1744 {
1745         for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1746         {
1747                 const BufferVarLayoutEntry& varEntry = layout.bufferVars[*varNdx];
1748
1749                 if (varEntry.arraySize == 0)
1750                         return varEntry.arrayStride;
1751                 else if (varEntry.topLevelArraySize == 0)
1752                         return varEntry.topLevelArrayStride;
1753         }
1754
1755         return 0;
1756 }
1757
1758 vector<int> computeBufferSizes (const ShaderInterface& interface, const BufferLayout& layout)
1759 {
1760         vector<int> sizes(layout.blocks.size());
1761
1762         for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1763         {
1764                 const BufferBlock&      block                   = interface.getBlock(declNdx);
1765                 const bool                      isArray                 = block.isArray();
1766                 const int                       numInstances    = isArray ? block.getArraySize() : 1;
1767
1768                 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1769                 {
1770                         const string    apiName         = getBlockAPIName(block, instanceNdx);
1771                         const int               blockNdx        = layout.getBlockIndex(apiName);
1772
1773                         if (blockNdx >= 0)
1774                         {
1775                                 const BlockLayoutEntry&         blockLayout             = layout.blocks[blockNdx];
1776                                 const int                                       baseSize                = blockLayout.size;
1777                                 const bool                                      isLastUnsized   = hasUnsizedArray(layout, blockLayout);
1778                                 const int                                       lastArraySize   = isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
1779                                 const int                                       stride                  = isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
1780
1781                                 sizes[blockNdx] = baseSize + lastArraySize*stride;
1782                         }
1783                 }
1784         }
1785
1786         return sizes;
1787 }
1788
1789 BlockDataPtr getBlockDataPtr (const BufferLayout& layout, const BlockLayoutEntry& blockLayout, void* ptr, int bufferSize)
1790 {
1791         const bool      isLastUnsized   = hasUnsizedArray(layout, blockLayout);
1792         const int       baseSize                = blockLayout.size;
1793
1794         if (isLastUnsized)
1795         {
1796                 const int               lastArrayStride = getUnsizedArrayStride(layout, blockLayout);
1797                 const int               lastArraySize   = (bufferSize-baseSize) / (lastArrayStride ? lastArrayStride : 1);
1798
1799                 DE_ASSERT(baseSize + lastArraySize*lastArrayStride == bufferSize);
1800
1801                 return BlockDataPtr(ptr, bufferSize, lastArraySize);
1802         }
1803         else
1804                 return BlockDataPtr(ptr, bufferSize, 0);
1805 }
1806
1807 struct Buffer
1808 {
1809         deUint32                                buffer;
1810         int                                             size;
1811
1812         Buffer (deUint32 buffer_, int size_) : buffer(buffer_), size(size_) {}
1813         Buffer (void) : buffer(0), size(0) {}
1814 };
1815
1816 struct BlockLocation
1817 {
1818         int                                             index;
1819         int                                             offset;
1820         int                                             size;
1821
1822         BlockLocation (int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_) {}
1823         BlockLocation (void) : index(0), offset(0), size(0) {}
1824 };
1825
1826 void initRefDataStorage (const ShaderInterface& interface, const BufferLayout& layout, RefDataStorage& storage)
1827 {
1828         DE_ASSERT(storage.data.empty() && storage.pointers.empty());
1829
1830         const vector<int>       bufferSizes = computeBufferSizes(interface, layout);
1831         int                                     totalSize       = 0;
1832
1833         for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
1834                 totalSize += *sizeIter;
1835
1836         storage.data.resize(totalSize);
1837
1838         // Pointers for each block.
1839         {
1840                 deUint8*        basePtr         = storage.data.empty() ? DE_NULL : &storage.data[0];
1841                 int                     curOffset       = 0;
1842
1843                 DE_ASSERT(bufferSizes.size() == layout.blocks.size());
1844                 DE_ASSERT(totalSize == 0 || basePtr);
1845
1846                 storage.pointers.resize(layout.blocks.size());
1847
1848                 for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
1849                 {
1850                         const BlockLayoutEntry& blockLayout             = layout.blocks[blockNdx];
1851                         const int                               bufferSize              = bufferSizes[blockNdx];
1852
1853                         storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
1854
1855                         curOffset += bufferSize;
1856                 }
1857         }
1858 }
1859
1860
1861 vector<BlockDataPtr> blockLocationsToPtrs (const BufferLayout& layout, const vector<BlockLocation>& blockLocations, const vector<void*>& bufPtrs)
1862 {
1863         vector<BlockDataPtr> blockPtrs(blockLocations.size());
1864
1865         DE_ASSERT(layout.blocks.size() == blockLocations.size());
1866
1867         for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
1868         {
1869                 const BlockLayoutEntry& blockLayout             = layout.blocks[blockNdx];
1870                 const BlockLocation&    location                = blockLocations[blockNdx];
1871
1872                 blockPtrs[blockNdx] = getBlockDataPtr(layout, blockLayout, (deUint8*)bufPtrs[location.index] + location.offset, location.size);
1873         }
1874
1875         return blockPtrs;
1876 }
1877
1878 } // anonymous (utilities)
1879
1880 de::MovePtr<vk::Allocation> allocateAndBindMemory (Context& context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
1881 {
1882         const vk::DeviceInterface&              vkd             = context.getDeviceInterface();
1883         const vk::VkMemoryRequirements  bufReqs = vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
1884         de::MovePtr<vk::Allocation>             memory  = context.getDefaultAllocator().allocate(bufReqs, memReqs);
1885
1886         vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
1887
1888         return memory;
1889 }
1890
1891 vk::Move<vk::VkBuffer> createBuffer (Context& context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
1892 {
1893         const vk::VkDevice                      vkDevice                        = context.getDevice();
1894         const vk::DeviceInterface&      vk                                      = context.getDeviceInterface();
1895         const deUint32                  queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
1896
1897         const vk::VkBufferCreateInfo    bufferInfo              =
1898         {
1899                 vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,       // VkStructureType              sType;
1900                 DE_NULL,                                                                        // const void*                  pNext;
1901                 0u,                                                                                     // VkBufferCreateFlags  flags;
1902                 bufferSize,                                                                     // VkDeviceSize                 size;
1903                 usageFlags,                                                                     // VkBufferUsageFlags   usage;
1904                 vk::VK_SHARING_MODE_EXCLUSIVE,                          // VkSharingMode                sharingMode;
1905                 1u,                                                                                     // deUint32                             queueFamilyCount;
1906                 &queueFamilyIndex                                                       // const deUint32*              pQueueFamilyIndices;
1907         };
1908
1909         return vk::createBuffer(vk, vkDevice, &bufferInfo);
1910 }
1911
1912 // SSBOLayoutCaseInstance
1913
1914 class SSBOLayoutCaseInstance : public TestInstance
1915 {
1916 public:
1917                                                                 SSBOLayoutCaseInstance  (Context&                                       context,
1918                                                                                                                 SSBOLayoutCase::BufferMode      bufferMode,
1919                                                                                                                 const ShaderInterface&          interface,
1920                                                                                                                 const BufferLayout&                     refLayout,
1921                                                                                                                 const RefDataStorage&           initialData,
1922                                                                                                                 const RefDataStorage&           writeData);
1923         virtual                                         ~SSBOLayoutCaseInstance (void);
1924         virtual tcu::TestStatus         iterate                                         (void);
1925
1926 private:
1927         SSBOLayoutCase::BufferMode      m_bufferMode;
1928         const ShaderInterface&          m_interface;
1929         const BufferLayout&                     m_refLayout;
1930         const RefDataStorage&           m_initialData;  // Initial data stored in buffer.
1931         const RefDataStorage&           m_writeData;    // Data written by compute shader.
1932
1933
1934         typedef de::SharedPtr<vk::Unique<vk::VkBuffer> >        VkBufferSp;
1935         typedef de::SharedPtr<vk::Allocation>                           AllocationSp;
1936
1937         std::vector<VkBufferSp>         m_uniformBuffers;
1938         std::vector<AllocationSp>       m_uniformAllocs;
1939 };
1940
1941 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance (Context&                                        context,
1942                                                                                                 SSBOLayoutCase::BufferMode      bufferMode,
1943                                                                                                 const ShaderInterface&          interface,
1944                                                                                                 const BufferLayout&                     refLayout,
1945                                                                                                 const RefDataStorage&           initialData,
1946                                                                                                 const RefDataStorage&           writeData)
1947         : TestInstance  (context)
1948         , m_bufferMode  (bufferMode)
1949         , m_interface   (interface)
1950         , m_refLayout   (refLayout)
1951         , m_initialData (initialData)
1952         , m_writeData   (writeData)
1953 {
1954 }
1955
1956 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance (void)
1957 {
1958 }
1959
1960 tcu::TestStatus SSBOLayoutCaseInstance::iterate (void)
1961 {
1962         // todo: add compute stage availability check
1963         const vk::DeviceInterface&      vk                                      = m_context.getDeviceInterface();
1964         const vk::VkDevice                      device                          = m_context.getDevice();
1965         const vk::VkQueue                       queue                           = m_context.getUniversalQueue();
1966         const deUint32                          queueFamilyIndex        = m_context.getUniversalQueueFamilyIndex();
1967
1968         // Create descriptor set
1969         const deUint32 acBufferSize = 1024;
1970         vk::Move<vk::VkBuffer> acBuffer (createBuffer(m_context, acBufferSize, vk:: VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
1971         de::UniquePtr<vk::Allocation> acBufferAlloc (allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
1972
1973         deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
1974         flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
1975
1976         vk::DescriptorSetLayoutBuilder setLayoutBuilder;
1977         vk::DescriptorPoolBuilder poolBuilder;
1978
1979         setLayoutBuilder
1980                 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
1981
1982         int numBlocks = 0;
1983         const int numBindings = m_interface.getNumBlocks();
1984         for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
1985         {
1986                 const BufferBlock& block = m_interface.getBlock(bindingNdx);
1987                 if (block.isArray())
1988                 {
1989                         setLayoutBuilder
1990                                 .addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(), vk::VK_SHADER_STAGE_COMPUTE_BIT);
1991                         numBlocks += block.getArraySize();
1992                 }
1993                 else
1994                 {
1995                         setLayoutBuilder
1996                                 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
1997                         numBlocks += 1;
1998                 }
1999         }
2000
2001         poolBuilder
2002                 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)(1 + numBlocks));
2003
2004         const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
2005         const vk::Unique<vk::VkDescriptorPool> descriptorPool(poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2006
2007         const vk::VkDescriptorSetAllocateInfo allocInfo =
2008         {
2009                 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2010                 DE_NULL,
2011                 *descriptorPool,
2012                 1u,
2013                 &descriptorSetLayout.get(),
2014         };
2015
2016         const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
2017         const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
2018
2019         vk::DescriptorSetUpdateBuilder setUpdateBuilder;
2020         std::vector<vk::VkDescriptorBufferInfo> descriptors(numBlocks);
2021
2022         setUpdateBuilder
2023                 .writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
2024
2025         vector<BlockDataPtr>  mappedBlockPtrs;
2026
2027         // Upload base buffers
2028         const std::vector<int> bufferSizes      = computeBufferSizes(m_interface, m_refLayout);
2029         {
2030                 std::vector<void*>                              mapPtrs;
2031                 std::vector<BlockLocation>              blockLocations  (numBlocks);
2032
2033                 DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
2034
2035                 if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2036                 {
2037                         mapPtrs.resize(numBlocks);
2038                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2039                         {
2040                                 const deUint32 bufferSize = bufferSizes[blockNdx];
2041                                 DE_ASSERT(bufferSize > 0);
2042
2043                                 blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
2044
2045                                 vk::Move<vk::VkBuffer>                          buffer          = createBuffer(m_context, bufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2046                                 de::MovePtr<vk::Allocation>                     alloc           = allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
2047
2048                                 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
2049
2050                                 mapPtrs[blockNdx] = alloc->getHostPtr();
2051
2052                                 m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2053                                 m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2054                         }
2055                 }
2056                 else
2057                 {
2058                         DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2059
2060                         vk::VkPhysicalDeviceProperties properties;
2061                         m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
2062                         const int       bindingAlignment        = (int)properties.limits.minStorageBufferOffsetAlignment;
2063                         int                     curOffset                       = 0;
2064                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2065                         {
2066                                 const int bufferSize = bufferSizes[blockNdx];
2067                                 DE_ASSERT(bufferSize > 0);
2068
2069                                 if (bindingAlignment > 0)
2070                                         curOffset = deRoundUp32(curOffset, bindingAlignment);
2071
2072                                 blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
2073                                 curOffset += bufferSize;
2074                         }
2075
2076                         const int                                               totalBufferSize = curOffset;
2077                         vk::Move<vk::VkBuffer>                  buffer                  = createBuffer(m_context, totalBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2078                         de::MovePtr<vk::Allocation>             alloc                   = allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
2079
2080                         mapPtrs.push_back(alloc->getHostPtr());
2081
2082                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2083                         {
2084                                 const deUint32                                          bufferSize      = bufferSizes[blockNdx];
2085                                 const deUint32                                          offset          = blockLocations[blockNdx].offset;
2086
2087                                 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
2088                         }
2089
2090                         m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2091                         m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2092                 }
2093
2094                 // Update remaining bindings
2095                 {
2096                         int blockNdx = 0;
2097                         for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
2098                         {
2099                                 const BufferBlock&      block                           = m_interface.getBlock(bindingNdx);
2100                                 const int                       numBlocksInBinding      = (block.isArray() ? block.getArraySize() : 1);
2101
2102                                 setUpdateBuilder.writeArray(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
2103                                         vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
2104
2105                                 blockNdx += numBlocksInBinding;
2106                         }
2107                 }
2108
2109                 // Copy the initial data to the storage buffers
2110                 {
2111                         mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
2112                         copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
2113
2114                         if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2115                         {
2116                                 DE_ASSERT(m_uniformAllocs.size() == bufferSizes.size());
2117                                 for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2118                                 {
2119                                         const int size = bufferSizes[allocNdx];
2120                                         vk::Allocation* alloc = m_uniformAllocs[allocNdx].get();
2121                                         flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), size);
2122                                 }
2123                         }
2124                         else
2125                         {
2126                                 DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2127                                 DE_ASSERT(m_uniformAllocs.size() == 1);
2128                                 int totalSize = 0;
2129                                 for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2130                                 {
2131                                         totalSize += bufferSizes[bufferNdx];
2132                                 }
2133
2134                                 DE_ASSERT(totalSize > 0);
2135                                 vk::Allocation* alloc = m_uniformAllocs[0].get();
2136                                 flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), totalSize);
2137                         }
2138                 }
2139         }
2140
2141         setUpdateBuilder.update(vk, device);
2142
2143         const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams =
2144         {
2145                 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,      // VkStructureType                              sType;
2146                 DE_NULL,                                                                                        // const void*                                  pNext;
2147                 (vk::VkPipelineLayoutCreateFlags)0,
2148                 1u,                                                                                                     // deUint32                                             descriptorSetCount;
2149                 &*descriptorSetLayout,                                                          // const VkDescriptorSetLayout* pSetLayouts;
2150                 0u,                                                                                                     // deUint32                                             pushConstantRangeCount;
2151                 DE_NULL,                                                                                        // const VkPushConstantRange*   pPushConstantRanges;
2152         };
2153         vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
2154
2155         vk::Move<vk::VkShaderModule> shaderModule (createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
2156         const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
2157         {
2158                 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,// VkStructureType                              sType;
2159                 DE_NULL,                                                                                                // const void*                                  pNext;
2160                 (vk::VkPipelineShaderStageCreateFlags)0,
2161                 vk::VK_SHADER_STAGE_COMPUTE_BIT,                                                // VkShaderStage                                stage;
2162                 *shaderModule,                                                                                  // VkShader                                             shader;
2163                 "main",                                                                                                 //
2164                 DE_NULL,                                                                                                // const VkSpecializationInfo*  pSpecializationInfo;
2165         };
2166         const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
2167         {
2168                 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,     // VkStructureType                                      sType;
2169                 DE_NULL,                                                                                        // const void*                                          pNext;
2170                 0,                                                                                                      // VkPipelineCreateFlags                        flags;
2171                 pipelineShaderStageParams,                                                      // VkPipelineShaderStageCreateInfo      stage;
2172                 *pipelineLayout,                                                                        // VkPipelineLayout                                     layout;
2173                 DE_NULL,                                                                                        // VkPipeline                                           basePipelineHandle;
2174                 0,                                                                                                      // deInt32                                                      basePipelineIndex;
2175         };
2176         vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
2177
2178         vk::Move<vk::VkCommandPool> cmdPool (createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
2179         vk::Move<vk::VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2180
2181         const vk::VkCommandBufferBeginInfo cmdBufBeginParams =
2182         {
2183                 vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,        //      VkStructureType                         sType;
2184                 DE_NULL,                                                                                        //      const void*                                     pNext;
2185                 0u,                                                                                                     //      VkCmdBufferOptimizeFlags        flags;
2186                 (const vk::VkCommandBufferInheritanceInfo*)DE_NULL,
2187         };
2188         VK_CHECK(vk.beginCommandBuffer(*cmdBuffer, &cmdBufBeginParams));
2189
2190         vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2191         vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2192
2193         vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
2194
2195         // Add barriers for shader writes to storage buffers before host access
2196         std::vector<vk::VkBufferMemoryBarrier> barriers;
2197         if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2198         {
2199                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2200                 {
2201                         const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
2202
2203                         const vk::VkBufferMemoryBarrier barrier =
2204                         {
2205                                 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2206                                 DE_NULL,
2207                                 vk::VK_ACCESS_SHADER_WRITE_BIT,
2208                                 vk::VK_ACCESS_HOST_READ_BIT,
2209                                 VK_QUEUE_FAMILY_IGNORED,
2210                                 VK_QUEUE_FAMILY_IGNORED,
2211                                 uniformBuffer,
2212                                 0u,
2213                                 static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])
2214                         };
2215                         barriers.push_back(barrier);
2216                 }
2217         }
2218         else
2219         {
2220                 const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
2221
2222                 vk::VkDeviceSize totalSize      = 0;
2223                 for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2224                         totalSize += bufferSizes[bufferNdx];
2225
2226                 const vk::VkBufferMemoryBarrier barrier =
2227                 {
2228                         vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2229                         DE_NULL,
2230                         vk::VK_ACCESS_SHADER_WRITE_BIT,
2231                         vk::VK_ACCESS_HOST_READ_BIT,
2232                         VK_QUEUE_FAMILY_IGNORED,
2233                         VK_QUEUE_FAMILY_IGNORED,
2234                         uniformBuffer,
2235                         0u,
2236                         totalSize
2237                 };
2238                 barriers.push_back(barrier);
2239         }
2240         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
2241                                                   0u, DE_NULL, static_cast<deUint32>(barriers.size()), &barriers[0], 0u, DE_NULL);
2242
2243         VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
2244
2245         vk::Move<vk::VkFence> fence (createFence(vk, device));
2246
2247         const vk::VkSubmitInfo  submitInfo  =
2248         {
2249                 vk::VK_STRUCTURE_TYPE_SUBMIT_INFO,
2250                 DE_NULL,
2251                 0u,
2252                 (const vk::VkSemaphore*)DE_NULL,
2253                 (const vk::VkPipelineStageFlags*)DE_NULL,
2254                 1u,
2255                 &cmdBuffer.get(),
2256                 0u,
2257                 (const vk::VkSemaphore*)DE_NULL,
2258         };
2259
2260         VK_CHECK(vk.queueSubmit(queue, 1u, &submitInfo, *fence));
2261         VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
2262
2263         // Read back ac_numPassed data
2264         bool counterOk;
2265         {
2266                 const int refCount = 1;
2267                 int resCount = 0;
2268
2269                 resCount = *((const int*)acBufferAlloc->getHostPtr());
2270
2271                 counterOk = (refCount == resCount);
2272                 if (!counterOk)
2273                 {
2274                         m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount << ", expected " << refCount << TestLog::EndMessage;
2275                 }
2276         }
2277
2278         // Validate result
2279         const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers, m_refLayout, mappedBlockPtrs);
2280
2281         if (compareOk && counterOk)
2282                 return tcu::TestStatus::pass("Result comparison and counter values are OK");
2283         else if (!compareOk && counterOk)
2284                 return tcu::TestStatus::fail("Result comparison failed");
2285         else if (compareOk && !counterOk)
2286                 return tcu::TestStatus::fail("Counter value incorrect");
2287         else
2288                 return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
2289 }
2290
2291 // SSBOLayoutCase.
2292
2293 SSBOLayoutCase::SSBOLayoutCase (tcu::TestContext& testCtx, const char* name, const char* description, BufferMode bufferMode, MatrixLoadFlags matrixLoadFlag)
2294         : TestCase                      (testCtx, name, description)
2295         , m_bufferMode          (bufferMode)
2296         , m_matrixLoadFlag      (matrixLoadFlag)
2297 {
2298 }
2299
2300 SSBOLayoutCase::~SSBOLayoutCase (void)
2301 {
2302 }
2303
2304 void SSBOLayoutCase::initPrograms (vk::SourceCollections& programCollection) const
2305 {
2306         DE_ASSERT(!m_computeShaderSrc.empty());
2307
2308         if (usesRelaxedLayout(m_interface))
2309         {
2310                 programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
2311                         << vk::GlslBuildOptions(vk::SPIRV_VERSION_1_0, vk::GlslBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
2312         }
2313         else
2314                 programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
2315 }
2316
2317 TestInstance* SSBOLayoutCase::createInstance (Context& context) const
2318 {
2319         if (!de::contains(context.getDeviceExtensions().begin(), context.getDeviceExtensions().end(), "VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
2320                 TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
2321         return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData);
2322 }
2323
2324 void SSBOLayoutCase::init ()
2325 {
2326         computeReferenceLayout  (m_refLayout, m_interface);
2327         initRefDataStorage              (m_interface, m_refLayout, m_initialData);
2328         initRefDataStorage              (m_interface, m_refLayout, m_writeData);
2329         generateValues                  (m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
2330         generateValues                  (m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
2331         copyNonWrittenData              (m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
2332
2333         m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers, m_matrixLoadFlag);
2334 }
2335
2336 } // ssbo
2337 } // vkt