1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * \brief SSBO layout case.
24 *//*--------------------------------------------------------------------*/
26 #include "vktSSBOLayoutCase.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "gluContextInfo.hpp"
29 #include "gluShaderUtil.hpp"
30 #include "gluVarType.hpp"
31 #include "gluVarTypeUtil.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deRandom.hpp"
34 #include "deStringUtil.hpp"
38 #include "deSharedPtr.hpp"
40 #include "vkBuilderUtil.hpp"
41 #include "vkMemUtil.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkTypeUtil.hpp"
57 using glu::StructType;
58 using glu::StructMember;
63 LayoutFlagsFmt (deUint32 flags_) : flags(flags_) {}
66 std::ostream& operator<< (std::ostream& str, const LayoutFlagsFmt& fmt)
74 { LAYOUT_STD140, "std140" },
75 { LAYOUT_STD430, "std430" },
76 { LAYOUT_ROW_MAJOR, "row_major" },
77 { LAYOUT_COLUMN_MAJOR, "column_major" }
80 deUint32 remBits = fmt.flags;
81 for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
83 if (remBits & bitDesc[descNdx].bit)
85 if (remBits != fmt.flags)
87 str << bitDesc[descNdx].token;
88 remBits &= ~bitDesc[descNdx].bit;
91 DE_ASSERT(remBits == 0);
95 // BufferVar implementation.
97 BufferVar::BufferVar (const char* name, const VarType& type, deUint32 flags)
105 // BufferBlock implementation.
107 BufferBlock::BufferBlock (const char* blockName)
108 : m_blockName (blockName)
115 void BufferBlock::setArraySize (int arraySize)
117 DE_ASSERT(arraySize >= 0);
118 m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
119 m_arraySize = arraySize;
122 std::ostream& operator<< (std::ostream& stream, const BlockLayoutEntry& entry)
124 stream << entry.name << " { name = " << entry.name
125 << ", size = " << entry.size
126 << ", activeVarIndices = [";
128 for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
130 if (i != entry.activeVarIndices.begin())
139 static bool isUnsizedArray (const BufferVarLayoutEntry& entry)
141 DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
142 return entry.arraySize == 0 || entry.topLevelArraySize == 0;
145 std::ostream& operator<< (std::ostream& stream, const BufferVarLayoutEntry& entry)
147 stream << entry.name << " { type = " << glu::getDataTypeName(entry.type)
148 << ", blockNdx = " << entry.blockNdx
149 << ", offset = " << entry.offset
150 << ", arraySize = " << entry.arraySize
151 << ", arrayStride = " << entry.arrayStride
152 << ", matrixStride = " << entry.matrixStride
153 << ", topLevelArraySize = " << entry.topLevelArraySize
154 << ", topLevelArrayStride = " << entry.topLevelArrayStride
155 << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false")
160 // \todo [2012-01-24 pyry] Speed up lookups using hash.
162 int BufferLayout::getVariableIndex (const string& name) const
164 for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
166 if (bufferVars[ndx].name == name)
172 int BufferLayout::getBlockIndex (const string& name) const
174 for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
176 if (blocks[ndx].name == name)
182 // ShaderInterface implementation.
184 ShaderInterface::ShaderInterface (void)
188 ShaderInterface::~ShaderInterface (void)
190 for (std::vector<StructType*>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
193 for (std::vector<BufferBlock*>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
197 StructType& ShaderInterface::allocStruct (const char* name)
199 m_structs.reserve(m_structs.size()+1);
200 m_structs.push_back(new StructType(name));
201 return *m_structs.back();
204 struct StructNameEquals
208 StructNameEquals (const char* name_) : name(name_) {}
210 bool operator() (const StructType* type) const
212 return type->getTypeName() && name == type->getTypeName();
216 const StructType* ShaderInterface::findStruct (const char* name) const
218 std::vector<StructType*>::const_iterator pos = std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
219 return pos != m_structs.end() ? *pos : DE_NULL;
222 void ShaderInterface::getNamedStructs (std::vector<const StructType*>& structs) const
224 for (std::vector<StructType*>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
226 if ((*i)->getTypeName() != DE_NULL)
227 structs.push_back(*i);
231 BufferBlock& ShaderInterface::allocBlock (const char* name)
233 m_bufferBlocks.reserve(m_bufferBlocks.size()+1);
234 m_bufferBlocks.push_back(new BufferBlock(name));
235 return *m_bufferBlocks.back();
238 namespace // Utilities
240 // Layout computation.
242 int getDataTypeByteSize (glu::DataType type)
244 return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint32);
247 int getDataTypeByteAlignment (glu::DataType type)
251 case glu::TYPE_FLOAT:
254 case glu::TYPE_BOOL: return 1*(int)sizeof(deUint32);
256 case glu::TYPE_FLOAT_VEC2:
257 case glu::TYPE_INT_VEC2:
258 case glu::TYPE_UINT_VEC2:
259 case glu::TYPE_BOOL_VEC2: return 2*(int)sizeof(deUint32);
261 case glu::TYPE_FLOAT_VEC3:
262 case glu::TYPE_INT_VEC3:
263 case glu::TYPE_UINT_VEC3:
264 case glu::TYPE_BOOL_VEC3: // Fall-through to vec4
266 case glu::TYPE_FLOAT_VEC4:
267 case glu::TYPE_INT_VEC4:
268 case glu::TYPE_UINT_VEC4:
269 case glu::TYPE_BOOL_VEC4: return 4*(int)sizeof(deUint32);
277 static inline int deRoundUp32 (int a, int b)
280 return d*b == a ? a : (d+1)*b;
283 int computeStd140BaseAlignment (const VarType& type, deUint32 layoutFlags)
285 const int vec4Alignment = (int)sizeof(deUint32)*4;
287 if (type.isBasicType())
289 glu::DataType basicType = type.getBasicType();
291 if (glu::isDataTypeMatrix(basicType))
293 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
294 const int vecSize = isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
295 : glu::getDataTypeMatrixNumRows(basicType);
296 const int vecAlign = deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
301 return getDataTypeByteAlignment(basicType);
303 else if (type.isArrayType())
305 int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
307 // Round up to alignment of vec4
308 return deAlign32(elemAlignment, vec4Alignment);
312 DE_ASSERT(type.isStructType());
314 int maxBaseAlignment = 0;
316 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
317 maxBaseAlignment = de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
319 return deAlign32(maxBaseAlignment, vec4Alignment);
323 int computeStd430BaseAlignment (const VarType& type, deUint32 layoutFlags)
325 // Otherwise identical to std140 except that alignment of structures and arrays
326 // are not rounded up to alignment of vec4.
328 if (type.isBasicType())
330 glu::DataType basicType = type.getBasicType();
332 if (glu::isDataTypeMatrix(basicType))
334 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
335 const int vecSize = isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
336 : glu::getDataTypeMatrixNumRows(basicType);
337 const int vecAlign = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
341 return getDataTypeByteAlignment(basicType);
343 else if (type.isArrayType())
345 return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
349 DE_ASSERT(type.isStructType());
351 int maxBaseAlignment = 0;
353 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
354 maxBaseAlignment = de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
356 return maxBaseAlignment;
360 int computeRelaxedBlockBaseAlignment (const VarType& type, deUint32 layoutFlags)
362 if (type.isBasicType())
364 glu::DataType basicType = type.getBasicType();
366 if (glu::isDataTypeVector(basicType))
369 if (glu::isDataTypeMatrix(basicType))
371 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
372 const int vecSize = isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
373 : glu::getDataTypeMatrixNumRows(basicType);
374 const int vecAlign = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
378 return getDataTypeByteAlignment(basicType);
380 else if (type.isArrayType())
381 return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
384 DE_ASSERT(type.isStructType());
386 int maxBaseAlignment = 0;
387 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
388 maxBaseAlignment = de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
390 return maxBaseAlignment;
394 inline deUint32 mergeLayoutFlags (deUint32 prevFlags, deUint32 newFlags)
396 const deUint32 packingMask = LAYOUT_STD430|LAYOUT_STD140|LAYOUT_RELAXED;
397 const deUint32 matrixMask = LAYOUT_ROW_MAJOR|LAYOUT_COLUMN_MAJOR;
399 deUint32 mergedFlags = 0;
401 mergedFlags |= ((newFlags & packingMask) ? newFlags : prevFlags) & packingMask;
402 mergedFlags |= ((newFlags & matrixMask) ? newFlags : prevFlags) & matrixMask;
408 bool isPow2(T powerOf2)
412 return (powerOf2 & (powerOf2 - (T)1)) == (T)0;
416 T roundToPow2(T number, int powerOf2)
418 DE_ASSERT(isPow2(powerOf2));
419 return (number + (T)powerOf2 - (T)1) & (T)(~(powerOf2 - 1));
422 //! Appends all child elements to layout, returns value that should be appended to offset.
423 int computeReferenceLayout (
424 BufferLayout& layout,
427 const std::string& curPrefix,
429 deUint32 layoutFlags)
431 // Reference layout uses std430 rules by default. std140 rules are
432 // choosen only for blocks that have std140 layout.
433 const int baseAlignment = (layoutFlags & LAYOUT_STD140) != 0 ? computeStd140BaseAlignment(type, layoutFlags) :
434 (layoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(type, layoutFlags) :
435 computeStd430BaseAlignment(type, layoutFlags);
436 int curOffset = deAlign32(baseOffset, baseAlignment);
437 const int topLevelArraySize = 1; // Default values
438 const int topLevelArrayStride = 0;
440 if (type.isBasicType())
442 const glu::DataType basicType = type.getBasicType();
443 BufferVarLayoutEntry entry;
445 entry.name = curPrefix;
446 entry.type = basicType;
448 entry.arrayStride = 0;
449 entry.matrixStride = 0;
450 entry.topLevelArraySize = topLevelArraySize;
451 entry.topLevelArrayStride = topLevelArrayStride;
452 entry.blockNdx = curBlockNdx;
454 if (glu::isDataTypeMatrix(basicType))
456 // Array of vectors as specified in rules 5 & 7.
457 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
458 const int numVecs = isRowMajor ? glu::getDataTypeMatrixNumRows(basicType)
459 : glu::getDataTypeMatrixNumColumns(basicType);
461 entry.offset = curOffset;
462 entry.matrixStride = baseAlignment;
463 entry.isRowMajor = isRowMajor;
465 curOffset += numVecs*baseAlignment;
469 if (glu::isDataTypeVector(basicType) && (getDataTypeByteSize(basicType) <= 16 ? curOffset / 16 != (curOffset + getDataTypeByteSize(basicType) - 1) / 16 : curOffset % 16 != 0) && (layoutFlags & LAYOUT_RELAXED))
470 curOffset = roundToPow2(curOffset, 16);
473 entry.offset = curOffset;
475 curOffset += getDataTypeByteSize(basicType);
478 layout.bufferVars.push_back(entry);
480 else if (type.isArrayType())
482 const VarType& elemType = type.getElementType();
484 if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
486 // Array of scalars or vectors.
487 const glu::DataType elemBasicType = elemType.getBasicType();
488 const int stride = baseAlignment;
489 BufferVarLayoutEntry entry;
491 entry.name = curPrefix + "[0]"; // Array variables are always postfixed with [0]
492 entry.type = elemBasicType;
493 entry.blockNdx = curBlockNdx;
494 entry.offset = curOffset;
495 entry.arraySize = type.getArraySize();
496 entry.arrayStride = stride;
497 entry.matrixStride = 0;
498 entry.topLevelArraySize = topLevelArraySize;
499 entry.topLevelArrayStride = topLevelArrayStride;
501 curOffset += stride*type.getArraySize();
503 layout.bufferVars.push_back(entry);
505 else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
507 // Array of matrices.
508 const glu::DataType elemBasicType = elemType.getBasicType();
509 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
510 const int numVecs = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
511 : glu::getDataTypeMatrixNumColumns(elemBasicType);
512 const int vecStride = baseAlignment;
513 BufferVarLayoutEntry entry;
515 entry.name = curPrefix + "[0]"; // Array variables are always postfixed with [0]
516 entry.type = elemBasicType;
517 entry.blockNdx = curBlockNdx;
518 entry.offset = curOffset;
519 entry.arraySize = type.getArraySize();
520 entry.arrayStride = vecStride*numVecs;
521 entry.matrixStride = vecStride;
522 entry.isRowMajor = isRowMajor;
523 entry.topLevelArraySize = topLevelArraySize;
524 entry.topLevelArrayStride = topLevelArrayStride;
526 curOffset += numVecs*vecStride*type.getArraySize();
528 layout.bufferVars.push_back(entry);
532 DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
534 for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
535 curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "[" + de::toString(elemNdx) + "]", type.getElementType(), layoutFlags);
540 DE_ASSERT(type.isStructType());
542 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
543 curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(), memberIter->getType(), layoutFlags);
545 curOffset = deAlign32(curOffset, baseAlignment);
548 return curOffset-baseOffset;
551 //! Appends all child elements to layout, returns offset increment.
552 int computeReferenceLayout (BufferLayout& layout, int curBlockNdx, const std::string& blockPrefix, int baseOffset, const BufferVar& bufVar, deUint32 blockLayoutFlags)
554 const VarType& varType = bufVar.getType();
555 const deUint32 combinedFlags = mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
557 if (varType.isArrayType())
559 // Top-level arrays need special care.
560 const int topLevelArraySize = varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
561 const string prefix = blockPrefix + bufVar.getName() + "[0]";
562 const bool isStd140 = (blockLayoutFlags & LAYOUT_STD140) != 0;
563 const int vec4Align = (int)sizeof(deUint32)*4;
564 const int baseAlignment = isStd140 ? computeStd140BaseAlignment(varType, combinedFlags) :
565 (blockLayoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(varType, combinedFlags) :
566 computeStd430BaseAlignment(varType, combinedFlags);
567 int curOffset = deAlign32(baseOffset, baseAlignment);
568 const VarType& elemType = varType.getElementType();
570 if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
572 // Array of scalars or vectors.
573 const glu::DataType elemBasicType = elemType.getBasicType();
574 const int elemBaseAlign = getDataTypeByteAlignment(elemBasicType);
575 const int stride = isStd140 ? deAlign32(elemBaseAlign, vec4Align) : elemBaseAlign;
576 BufferVarLayoutEntry entry;
579 entry.topLevelArraySize = 1;
580 entry.topLevelArrayStride = 0;
581 entry.type = elemBasicType;
582 entry.blockNdx = curBlockNdx;
583 entry.offset = curOffset;
584 entry.arraySize = topLevelArraySize;
585 entry.arrayStride = stride;
586 entry.matrixStride = 0;
588 layout.bufferVars.push_back(entry);
590 curOffset += stride*topLevelArraySize;
592 else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
594 // Array of matrices.
595 const glu::DataType elemBasicType = elemType.getBasicType();
596 const bool isRowMajor = !!(combinedFlags & LAYOUT_ROW_MAJOR);
597 const int vecSize = isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
598 : glu::getDataTypeMatrixNumRows(elemBasicType);
599 const int numVecs = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
600 : glu::getDataTypeMatrixNumColumns(elemBasicType);
601 const glu::DataType vecType = glu::getDataTypeFloatVec(vecSize);
602 const int vecBaseAlign = getDataTypeByteAlignment(vecType);
603 const int stride = isStd140 ? deAlign32(vecBaseAlign, vec4Align) : vecBaseAlign;
604 BufferVarLayoutEntry entry;
607 entry.topLevelArraySize = 1;
608 entry.topLevelArrayStride = 0;
609 entry.type = elemBasicType;
610 entry.blockNdx = curBlockNdx;
611 entry.offset = curOffset;
612 entry.arraySize = topLevelArraySize;
613 entry.arrayStride = stride*numVecs;
614 entry.matrixStride = stride;
615 entry.isRowMajor = isRowMajor;
617 layout.bufferVars.push_back(entry);
619 curOffset += stride*numVecs*topLevelArraySize;
623 DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
625 // Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
626 // was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
627 // before struct. Padding after struct will be added as it should.
629 // Stride could be computed prior to creating child elements, but it would essentially require running
630 // the layout computation twice. Instead we fix stride to child elements afterwards.
632 const int firstChildNdx = (int)layout.bufferVars.size();
633 const int stride = computeReferenceLayout(layout, curBlockNdx, curOffset, prefix, varType.getElementType(), combinedFlags);
635 for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
637 layout.bufferVars[childNdx].topLevelArraySize = topLevelArraySize;
638 layout.bufferVars[childNdx].topLevelArrayStride = stride;
641 curOffset += stride*topLevelArraySize;
644 return curOffset-baseOffset;
647 return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType, combinedFlags);
650 void computeReferenceLayout (BufferLayout& layout, ShaderInterface& interface)
652 int numBlocks = interface.getNumBlocks();
654 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
656 BufferBlock& block = interface.getBlock(blockNdx);
657 bool hasInstanceName = block.getInstanceName() != DE_NULL;
658 std::string blockPrefix = hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
660 int activeBlockNdx = (int)layout.blocks.size();
661 int firstVarNdx = (int)layout.bufferVars.size();
663 size_t oldSize = layout.bufferVars.size();
664 for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
666 BufferVar& bufVar = *varIter;
667 curOffset += computeReferenceLayout(layout, activeBlockNdx, blockPrefix, curOffset, bufVar, block.getFlags());
668 if (block.getFlags() & LAYOUT_RELAXED)
670 DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
671 bufVar.setOffset(layout.bufferVars[oldSize].offset);
673 oldSize = layout.bufferVars.size();
676 int varIndicesEnd = (int)layout.bufferVars.size();
677 int blockSize = curOffset;
678 int numInstances = block.isArray() ? block.getArraySize() : 1;
680 // Create block layout entries for each instance.
681 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
683 // Allocate entry for instance.
684 layout.blocks.push_back(BlockLayoutEntry());
685 BlockLayoutEntry& blockEntry = layout.blocks.back();
687 blockEntry.name = block.getBlockName();
688 blockEntry.size = blockSize;
690 // Compute active variable set for block.
691 for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
692 blockEntry.activeVarIndices.push_back(varNdx);
695 blockEntry.name += "[" + de::toString(instanceNdx) + "]";
702 void generateValue (const BufferVarLayoutEntry& entry, int unsizedArraySize, void* basePtr, de::Random& rnd)
704 const glu::DataType scalarType = glu::getDataTypeScalarType(entry.type);
705 const int scalarSize = glu::getDataTypeScalarSize(entry.type);
706 const int arraySize = entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
707 const int arrayStride = entry.arrayStride;
708 const int topLevelSize = entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
709 const int topLevelStride = entry.topLevelArrayStride;
710 const bool isMatrix = glu::isDataTypeMatrix(entry.type);
711 const int numVecs = isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) : glu::getDataTypeMatrixNumColumns(entry.type)) : 1;
712 const int vecSize = scalarSize / numVecs;
713 const int compSize = sizeof(deUint32);
715 DE_ASSERT(scalarSize%numVecs == 0);
716 DE_ASSERT(topLevelSize >= 0);
717 DE_ASSERT(arraySize >= 0);
719 for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
721 deUint8* const topElemPtr = (deUint8*)basePtr + entry.offset + topElemNdx*topLevelStride;
723 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
725 deUint8* const elemPtr = topElemPtr + elemNdx*arrayStride;
727 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
729 deUint8* const vecPtr = elemPtr + (isMatrix ? vecNdx*entry.matrixStride : 0);
731 for (int compNdx = 0; compNdx < vecSize; compNdx++)
733 deUint8* const compPtr = vecPtr + compSize*compNdx;
737 case glu::TYPE_FLOAT: *((float*)compPtr) = (float)rnd.getInt(-9, 9); break;
738 case glu::TYPE_INT: *((int*)compPtr) = rnd.getInt(-9, 9); break;
739 case glu::TYPE_UINT: *((deUint32*)compPtr) = (deUint32)rnd.getInt(0, 9); break;
740 // \note Random bit pattern is used for true values. Spec states that all non-zero values are
741 // interpreted as true but some implementations fail this.
742 case glu::TYPE_BOOL: *((deUint32*)compPtr) = rnd.getBool() ? rnd.getUint32()|1u : 0u; break;
752 void generateValues (const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, deUint32 seed)
754 de::Random rnd (seed);
755 const int numBlocks = (int)layout.blocks.size();
757 DE_ASSERT(numBlocks == (int)blockPointers.size());
759 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
761 const BlockLayoutEntry& blockLayout = layout.blocks[blockNdx];
762 const BlockDataPtr& blockPtr = blockPointers[blockNdx];
763 const int numEntries = (int)layout.blocks[blockNdx].activeVarIndices.size();
765 for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
767 const int varNdx = blockLayout.activeVarIndices[entryNdx];
768 const BufferVarLayoutEntry& varEntry = layout.bufferVars[varNdx];
770 generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
777 const char* getCompareFuncForType (glu::DataType type)
781 case glu::TYPE_FLOAT: return "bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; }\n";
782 case glu::TYPE_FLOAT_VEC2: return "bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); }\n";
783 case glu::TYPE_FLOAT_VEC3: return "bool compare_vec3 (highp vec3 a, highp vec3 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); }\n";
784 case glu::TYPE_FLOAT_VEC4: return "bool compare_vec4 (highp vec4 a, highp vec4 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); }\n";
785 case glu::TYPE_FLOAT_MAT2: return "bool compare_mat2 (highp mat2 a, highp mat2 b) { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); }\n";
786 case glu::TYPE_FLOAT_MAT2X3: return "bool compare_mat2x3 (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); }\n";
787 case glu::TYPE_FLOAT_MAT2X4: return "bool compare_mat2x4 (highp mat2x4 a, highp mat2x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1]); }\n";
788 case glu::TYPE_FLOAT_MAT3X2: return "bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); }\n";
789 case glu::TYPE_FLOAT_MAT3: return "bool compare_mat3 (highp mat3 a, highp mat3 b) { return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2]); }\n";
790 case glu::TYPE_FLOAT_MAT3X4: return "bool compare_mat3x4 (highp mat3x4 a, highp mat3x4 b){ return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2]); }\n";
791 case glu::TYPE_FLOAT_MAT4X2: return "bool compare_mat4x2 (highp mat4x2 a, highp mat4x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2])&&compare_vec2(a[3], b[3]); }\n";
792 case glu::TYPE_FLOAT_MAT4X3: return "bool compare_mat4x3 (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); }\n";
793 case glu::TYPE_FLOAT_MAT4: return "bool compare_mat4 (highp mat4 a, highp mat4 b) { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); }\n";
794 case glu::TYPE_INT: return "bool compare_int (highp int a, highp int b) { return a == b; }\n";
795 case glu::TYPE_INT_VEC2: return "bool compare_ivec2 (highp ivec2 a, highp ivec2 b) { return a == b; }\n";
796 case glu::TYPE_INT_VEC3: return "bool compare_ivec3 (highp ivec3 a, highp ivec3 b) { return a == b; }\n";
797 case glu::TYPE_INT_VEC4: return "bool compare_ivec4 (highp ivec4 a, highp ivec4 b) { return a == b; }\n";
798 case glu::TYPE_UINT: return "bool compare_uint (highp uint a, highp uint b) { return a == b; }\n";
799 case glu::TYPE_UINT_VEC2: return "bool compare_uvec2 (highp uvec2 a, highp uvec2 b) { return a == b; }\n";
800 case glu::TYPE_UINT_VEC3: return "bool compare_uvec3 (highp uvec3 a, highp uvec3 b) { return a == b; }\n";
801 case glu::TYPE_UINT_VEC4: return "bool compare_uvec4 (highp uvec4 a, highp uvec4 b) { return a == b; }\n";
802 case glu::TYPE_BOOL: return "bool compare_bool (bool a, bool b) { return a == b; }\n";
803 case glu::TYPE_BOOL_VEC2: return "bool compare_bvec2 (bvec2 a, bvec2 b) { return a == b; }\n";
804 case glu::TYPE_BOOL_VEC3: return "bool compare_bvec3 (bvec3 a, bvec3 b) { return a == b; }\n";
805 case glu::TYPE_BOOL_VEC4: return "bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; }\n";
812 void getCompareDependencies (std::set<glu::DataType>& compareFuncs, glu::DataType basicType)
816 case glu::TYPE_FLOAT_VEC2:
817 case glu::TYPE_FLOAT_VEC3:
818 case glu::TYPE_FLOAT_VEC4:
819 compareFuncs.insert(glu::TYPE_FLOAT);
820 compareFuncs.insert(basicType);
823 case glu::TYPE_FLOAT_MAT2:
824 case glu::TYPE_FLOAT_MAT2X3:
825 case glu::TYPE_FLOAT_MAT2X4:
826 case glu::TYPE_FLOAT_MAT3X2:
827 case glu::TYPE_FLOAT_MAT3:
828 case glu::TYPE_FLOAT_MAT3X4:
829 case glu::TYPE_FLOAT_MAT4X2:
830 case glu::TYPE_FLOAT_MAT4X3:
831 case glu::TYPE_FLOAT_MAT4:
832 compareFuncs.insert(glu::TYPE_FLOAT);
833 compareFuncs.insert(glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType)));
834 compareFuncs.insert(basicType);
838 compareFuncs.insert(basicType);
843 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const VarType& type)
845 if (type.isStructType())
847 for (StructType::ConstIterator iter = type.getStructPtr()->begin(); iter != type.getStructPtr()->end(); ++iter)
848 collectUniqueBasicTypes(basicTypes, iter->getType());
850 else if (type.isArrayType())
851 collectUniqueBasicTypes(basicTypes, type.getElementType());
854 DE_ASSERT(type.isBasicType());
855 basicTypes.insert(type.getBasicType());
859 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const BufferBlock& bufferBlock)
861 for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
862 collectUniqueBasicTypes(basicTypes, iter->getType());
865 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const ShaderInterface& interface)
867 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
868 collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
871 void generateCompareFuncs (std::ostream& str, const ShaderInterface& interface)
873 std::set<glu::DataType> types;
874 std::set<glu::DataType> compareFuncs;
876 // Collect unique basic types
877 collectUniqueBasicTypes(types, interface);
879 // Set of compare functions required
880 for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
882 getCompareDependencies(compareFuncs, *iter);
885 for (int type = 0; type < glu::TYPE_LAST; ++type)
887 if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
888 str << getCompareFuncForType(glu::DataType(type));
892 bool usesRelaxedLayout (const ShaderInterface& interface)
894 //If any of blocks has LAYOUT_RELAXED flag
895 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
897 if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
906 Indent (int level_) : level(level_) {}
909 std::ostream& operator<< (std::ostream& str, const Indent& indent)
911 for (int i = 0; i < indent.level; i++)
916 void generateDeclaration (std::ostream& src, const BufferVar& bufferVar, int indentLevel)
918 // \todo [pyry] Qualifiers
919 if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
920 src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
921 else if (bufferVar.getOffset()!= ~0u)
922 src << "layout(offset = "<<bufferVar.getOffset()<<") ";
924 src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
927 void generateDeclaration (std::ostream& src, const BufferBlock& block, int bindingPoint)
930 if ((block.getFlags() & LAYOUT_MASK) != 0)
931 src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
933 src << "binding = " << bindingPoint;
937 src << "buffer " << block.getBlockName();
940 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
944 generateDeclaration(src, *varIter, 1 /* indent level */);
950 if (block.getInstanceName() != DE_NULL)
952 src << " " << block.getInstanceName();
954 src << "[" << block.getArraySize() << "]";
957 DE_ASSERT(!block.isArray());
962 void generateImmMatrixSrc (std::ostream& src, glu::DataType basicType, int matrixStride, bool isRowMajor, const void* valuePtr)
964 DE_ASSERT(glu::isDataTypeMatrix(basicType));
966 const int compSize = sizeof(deUint32);
967 const int numRows = glu::getDataTypeMatrixNumRows(basicType);
968 const int numCols = glu::getDataTypeMatrixNumColumns(basicType);
970 src << glu::getDataTypeName(basicType) << "(";
972 // Constructed in column-wise order.
973 for (int colNdx = 0; colNdx < numCols; colNdx++)
975 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
977 const deUint8* compPtr = (const deUint8*)valuePtr + (isRowMajor ? rowNdx*matrixStride + colNdx*compSize
978 : colNdx*matrixStride + rowNdx*compSize);
980 if (colNdx > 0 || rowNdx > 0)
983 src << de::floatToString(*((const float*)compPtr), 1);
990 void generateImmMatrixSrc (std::ostream& src,
991 glu::DataType basicType,
994 const void* valuePtr,
995 const char* resultVar,
996 const char* typeName,
997 const string shaderName)
999 const int compSize = sizeof(deUint32);
1000 const int numRows = glu::getDataTypeMatrixNumRows(basicType);
1001 const int numCols = glu::getDataTypeMatrixNumColumns(basicType);
1004 for (int colNdex = 0; colNdex < numCols; colNdex++)
1006 for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1008 src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << "[" << colNdex << "][" << rowNdex << "], ";
1009 const deUint8* compPtr = (const deUint8*)valuePtr + (isRowMajor ? rowNdex*matrixStride + colNdex*compSize
1010 : colNdex*matrixStride + rowNdex*compSize);
1012 src << de::floatToString(*((const float*)compPtr), 1);
1018 for (int colNdex = 0; colNdex < numCols; colNdex++)
1020 src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], " << typeName << numRows << "(";
1021 for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1023 const deUint8* compPtr = (const deUint8*)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize)
1024 : (colNdex * matrixStride + rowNdex * compSize));
1025 src << de::floatToString(*((const float*)compPtr), 1);
1027 if (rowNdex < numRows-1)
1034 void generateImmScalarVectorSrc (std::ostream& src, glu::DataType basicType, const void* valuePtr)
1036 DE_ASSERT(glu::isDataTypeFloatOrVec(basicType) ||
1037 glu::isDataTypeIntOrIVec(basicType) ||
1038 glu::isDataTypeUintOrUVec(basicType) ||
1039 glu::isDataTypeBoolOrBVec(basicType));
1041 const glu::DataType scalarType = glu::getDataTypeScalarType(basicType);
1042 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1043 const int compSize = sizeof(deUint32);
1046 src << glu::getDataTypeName(basicType) << "(";
1048 for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
1050 const deUint8* compPtr = (const deUint8*)valuePtr + scalarNdx*compSize;
1057 case glu::TYPE_FLOAT: src << de::floatToString(*((const float*)compPtr), 1); break;
1058 case glu::TYPE_INT: src << *((const int*)compPtr); break;
1059 case glu::TYPE_UINT: src << *((const deUint32*)compPtr) << "u"; break;
1060 case glu::TYPE_BOOL: src << (*((const deUint32*)compPtr) != 0u ? "true" : "false"); break;
1070 string getAPIName (const BufferBlock& block, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1072 std::ostringstream name;
1074 if (block.getInstanceName())
1075 name << block.getBlockName() << ".";
1077 name << var.getName();
1079 for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1081 if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1083 const VarType curType = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1084 const StructType* structPtr = curType.getStructPtr();
1086 name << "." << structPtr->getMember(pathComp->index).getName();
1088 else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1090 if (pathComp == accessPath.begin() || (pathComp+1) == accessPath.end())
1091 name << "[0]"; // Top- / bottom-level array
1093 name << "[" << pathComp->index << "]";
1102 string getShaderName (const BufferBlock& block, int instanceNdx, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1104 std::ostringstream name;
1106 if (block.getInstanceName())
1108 name << block.getInstanceName();
1110 if (block.isArray())
1111 name << "[" << instanceNdx << "]";
1116 DE_ASSERT(instanceNdx == 0);
1118 name << var.getName();
1120 for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1122 if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1124 const VarType curType = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1125 const StructType* structPtr = curType.getStructPtr();
1127 name << "." << structPtr->getMember(pathComp->index).getName();
1129 else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1130 name << "[" << pathComp->index << "]";
1138 int computeOffset (const BufferVarLayoutEntry& varLayout, const glu::TypeComponentVector& accessPath)
1140 const int topLevelNdx = (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.front().index : 0;
1141 const int bottomLevelNdx = (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.back().index : 0;
1143 return varLayout.offset + varLayout.topLevelArrayStride*topLevelNdx + varLayout.arrayStride*bottomLevelNdx;
1146 void generateCompareSrc (
1148 const char* resultVar,
1149 const BufferLayout& bufferLayout,
1150 const BufferBlock& block,
1152 const BlockDataPtr& blockPtr,
1153 const BufferVar& bufVar,
1154 const glu::SubTypeAccess& accessPath,
1155 MatrixLoadFlags matrixLoadFlag)
1157 const VarType curType = accessPath.getType();
1159 if (curType.isArrayType())
1161 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1163 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1164 generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx), LOAD_FULL_MATRIX);
1166 else if (curType.isStructType())
1168 const int numMembers = curType.getStructPtr()->getNumMembers();
1170 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1171 generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx), LOAD_FULL_MATRIX);
1175 DE_ASSERT(curType.isBasicType());
1177 const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1178 const int varNdx = bufferLayout.getVariableIndex(apiName);
1180 DE_ASSERT(varNdx >= 0);
1182 const BufferVarLayoutEntry& varLayout = bufferLayout.bufferVars[varNdx];
1183 const string shaderName = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1184 const glu::DataType basicType = curType.getBasicType();
1185 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1186 const char* typeName = glu::getDataTypeName(basicType);
1187 const void* valuePtr = (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1192 if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
1193 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr, resultVar, typeName, shaderName);
1196 src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
1197 generateImmMatrixSrc (src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
1203 src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << ", ";
1204 generateImmScalarVectorSrc(src, basicType, valuePtr);
1211 void generateCompareSrc (std::ostream& src, const char* resultVar, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, MatrixLoadFlags matrixLoadFlag)
1213 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1215 const BufferBlock& block = interface.getBlock(declNdx);
1216 const bool isArray = block.isArray();
1217 const int numInstances = isArray ? block.getArraySize() : 1;
1219 DE_ASSERT(!isArray || block.getInstanceName());
1221 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1223 const string instanceName = block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1224 const int blockNdx = layout.getBlockIndex(instanceName);
1225 const BlockDataPtr& blockPtr = blockPointers[blockNdx];
1227 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1229 const BufferVar& bufVar = *varIter;
1231 if ((bufVar.getFlags() & ACCESS_READ) == 0)
1232 continue; // Don't read from that variable.
1234 generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag);
1240 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
1242 void generateWriteSrc (
1244 const BufferLayout& bufferLayout,
1245 const BufferBlock& block,
1247 const BlockDataPtr& blockPtr,
1248 const BufferVar& bufVar,
1249 const glu::SubTypeAccess& accessPath)
1251 const VarType curType = accessPath.getType();
1253 if (curType.isArrayType())
1255 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1257 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1258 generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx));
1260 else if (curType.isStructType())
1262 const int numMembers = curType.getStructPtr()->getNumMembers();
1264 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1265 generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx));
1269 DE_ASSERT(curType.isBasicType());
1271 const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1272 const int varNdx = bufferLayout.getVariableIndex(apiName);
1274 DE_ASSERT(varNdx >= 0);
1276 const BufferVarLayoutEntry& varLayout = bufferLayout.bufferVars[varNdx];
1277 const string shaderName = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1278 const glu::DataType basicType = curType.getBasicType();
1279 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1280 const void* valuePtr = (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1282 src << "\t" << shaderName << " = ";
1285 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr);
1287 generateImmScalarVectorSrc(src, basicType, valuePtr);
1294 void generateWriteSrc (std::ostream& src, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers)
1296 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1298 const BufferBlock& block = interface.getBlock(declNdx);
1299 const bool isArray = block.isArray();
1300 const int numInstances = isArray ? block.getArraySize() : 1;
1302 DE_ASSERT(!isArray || block.getInstanceName());
1304 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1306 const string instanceName = block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1307 const int blockNdx = layout.getBlockIndex(instanceName);
1308 const BlockDataPtr& blockPtr = blockPointers[blockNdx];
1310 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1312 const BufferVar& bufVar = *varIter;
1314 if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
1315 continue; // Don't write to that variable.
1317 generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1323 string generateComputeShader (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& comparePtrs, const vector<BlockDataPtr>& writePtrs, MatrixLoadFlags matrixLoadFlag)
1325 std::ostringstream src;
1327 if (usesRelaxedLayout(interface))
1328 src << "#version 450\n";
1330 src << "#version 310 es\n";
1332 src << "layout(local_size_x = 1) in;\n";
1335 // Atomic counter for counting passed invocations.
1336 src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
1338 std::vector<const StructType*> namedStructs;
1339 interface.getNamedStructs(namedStructs);
1340 for (std::vector<const StructType*>::const_iterator structIter = namedStructs.begin(); structIter != namedStructs.end(); structIter++)
1341 src << glu::declare(*structIter) << ";\n";
1344 for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1346 const BufferBlock& block = interface.getBlock(blockNdx);
1347 generateDeclaration(src, block, 1 + blockNdx);
1351 // Comparison utilities.
1353 generateCompareFuncs(src, interface);
1356 "void main (void)\n"
1358 " bool allOk = true;\n";
1361 generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
1363 src << " if (allOk)\n"
1364 << " ac_numPassed++;\n"
1368 generateWriteSrc(src, interface, layout, writePtrs);
1375 void copyBufferVarData (const BufferVarLayoutEntry& dstEntry, const BlockDataPtr& dstBlockPtr, const BufferVarLayoutEntry& srcEntry, const BlockDataPtr& srcBlockPtr)
1377 DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
1378 DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
1379 DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
1380 DE_ASSERT(dstEntry.type == srcEntry.type);
1382 deUint8* const dstBasePtr = (deUint8*)dstBlockPtr.ptr + dstEntry.offset;
1383 const deUint8* const srcBasePtr = (const deUint8*)srcBlockPtr.ptr + srcEntry.offset;
1384 const int scalarSize = glu::getDataTypeScalarSize(dstEntry.type);
1385 const bool isMatrix = glu::isDataTypeMatrix(dstEntry.type);
1386 const int compSize = sizeof(deUint32);
1387 const int dstArraySize = dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
1388 const int dstArrayStride = dstEntry.arrayStride;
1389 const int dstTopLevelSize = dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
1390 const int dstTopLevelStride = dstEntry.topLevelArrayStride;
1391 const int srcArraySize = srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
1392 const int srcArrayStride = srcEntry.arrayStride;
1393 const int srcTopLevelSize = srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
1394 const int srcTopLevelStride = srcEntry.topLevelArrayStride;
1396 DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
1397 DE_UNREF(srcArraySize && srcTopLevelSize);
1399 for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
1401 deUint8* const dstTopPtr = dstBasePtr + topElemNdx*dstTopLevelStride;
1402 const deUint8* const srcTopPtr = srcBasePtr + topElemNdx*srcTopLevelStride;
1404 for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
1406 deUint8* const dstElemPtr = dstTopPtr + elementNdx*dstArrayStride;
1407 const deUint8* const srcElemPtr = srcTopPtr + elementNdx*srcArrayStride;
1411 const int numRows = glu::getDataTypeMatrixNumRows(dstEntry.type);
1412 const int numCols = glu::getDataTypeMatrixNumColumns(dstEntry.type);
1414 for (int colNdx = 0; colNdx < numCols; colNdx++)
1416 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1418 deUint8* dstCompPtr = dstElemPtr + (dstEntry.isRowMajor ? rowNdx*dstEntry.matrixStride + colNdx*compSize
1419 : colNdx*dstEntry.matrixStride + rowNdx*compSize);
1420 const deUint8* srcCompPtr = srcElemPtr + (srcEntry.isRowMajor ? rowNdx*srcEntry.matrixStride + colNdx*compSize
1421 : colNdx*srcEntry.matrixStride + rowNdx*compSize);
1423 DE_ASSERT((deIntptr)(srcCompPtr + compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1424 DE_ASSERT((deIntptr)(dstCompPtr + compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1425 deMemcpy(dstCompPtr, srcCompPtr, compSize);
1431 DE_ASSERT((deIntptr)(srcElemPtr + scalarSize*compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1432 DE_ASSERT((deIntptr)(dstElemPtr + scalarSize*compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1433 deMemcpy(dstElemPtr, srcElemPtr, scalarSize*compSize);
1439 void copyData (const BufferLayout& dstLayout, const vector<BlockDataPtr>& dstBlockPointers, const BufferLayout& srcLayout, const vector<BlockDataPtr>& srcBlockPointers)
1441 // \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
1442 int numBlocks = (int)srcLayout.blocks.size();
1444 for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
1446 const BlockLayoutEntry& srcBlock = srcLayout.blocks[srcBlockNdx];
1447 const BlockDataPtr& srcBlockPtr = srcBlockPointers[srcBlockNdx];
1448 int dstBlockNdx = dstLayout.getBlockIndex(srcBlock.name.c_str());
1450 if (dstBlockNdx >= 0)
1452 DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
1454 const BlockDataPtr& dstBlockPtr = dstBlockPointers[dstBlockNdx];
1456 for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin(); srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
1458 const BufferVarLayoutEntry& srcEntry = srcLayout.bufferVars[*srcVarNdxIter];
1459 int dstVarNdx = dstLayout.getVariableIndex(srcEntry.name.c_str());
1462 copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
1468 void copyNonWrittenData (
1469 const BufferLayout& layout,
1470 const BufferBlock& block,
1472 const BlockDataPtr& srcBlockPtr,
1473 const BlockDataPtr& dstBlockPtr,
1474 const BufferVar& bufVar,
1475 const glu::SubTypeAccess& accessPath)
1477 const VarType curType = accessPath.getType();
1479 if (curType.isArrayType())
1481 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1483 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1484 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.element(elemNdx));
1486 else if (curType.isStructType())
1488 const int numMembers = curType.getStructPtr()->getNumMembers();
1490 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1491 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.member(memberNdx));
1495 DE_ASSERT(curType.isBasicType());
1497 const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1498 const int varNdx = layout.getVariableIndex(apiName);
1500 DE_ASSERT(varNdx >= 0);
1502 const BufferVarLayoutEntry& varLayout = layout.bufferVars[varNdx];
1503 copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
1508 void copyNonWrittenData (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& srcPtrs, const vector<BlockDataPtr>& dstPtrs)
1510 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1512 const BufferBlock& block = interface.getBlock(declNdx);
1513 const bool isArray = block.isArray();
1514 const int numInstances = isArray ? block.getArraySize() : 1;
1516 DE_ASSERT(!isArray || block.getInstanceName());
1518 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1520 const string instanceName = block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1521 const int blockNdx = layout.getBlockIndex(instanceName);
1522 const BlockDataPtr& srcBlockPtr = srcPtrs[blockNdx];
1523 const BlockDataPtr& dstBlockPtr = dstPtrs[blockNdx];
1525 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1527 const BufferVar& bufVar = *varIter;
1529 if (bufVar.getFlags() & ACCESS_WRITE)
1532 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1538 bool compareComponents (glu::DataType scalarType, const void* ref, const void* res, int numComps)
1540 if (scalarType == glu::TYPE_FLOAT)
1542 const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
1544 for (int ndx = 0; ndx < numComps; ndx++)
1546 const float refVal = *((const float*)ref + ndx);
1547 const float resVal = *((const float*)res + ndx);
1549 if (deFloatAbs(resVal - refVal) >= threshold)
1553 else if (scalarType == glu::TYPE_BOOL)
1555 for (int ndx = 0; ndx < numComps; ndx++)
1557 const deUint32 refVal = *((const deUint32*)ref + ndx);
1558 const deUint32 resVal = *((const deUint32*)res + ndx);
1560 if ((refVal != 0) != (resVal != 0))
1566 DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
1568 for (int ndx = 0; ndx < numComps; ndx++)
1570 const deUint32 refVal = *((const deUint32*)ref + ndx);
1571 const deUint32 resVal = *((const deUint32*)res + ndx);
1573 if (refVal != resVal)
1581 bool compareBufferVarData (tcu::TestLog& log, const BufferVarLayoutEntry& refEntry, const BlockDataPtr& refBlockPtr, const BufferVarLayoutEntry& resEntry, const BlockDataPtr& resBlockPtr)
1583 DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
1584 DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
1585 DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
1586 DE_ASSERT(resEntry.type == refEntry.type);
1588 deUint8* const resBasePtr = (deUint8*)resBlockPtr.ptr + resEntry.offset;
1589 const deUint8* const refBasePtr = (const deUint8*)refBlockPtr.ptr + refEntry.offset;
1590 const glu::DataType scalarType = glu::getDataTypeScalarType(refEntry.type);
1591 const int scalarSize = glu::getDataTypeScalarSize(resEntry.type);
1592 const bool isMatrix = glu::isDataTypeMatrix(resEntry.type);
1593 const int compSize = sizeof(deUint32);
1594 const int maxPrints = 3;
1597 const int resArraySize = resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
1598 const int resArrayStride = resEntry.arrayStride;
1599 const int resTopLevelSize = resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
1600 const int resTopLevelStride = resEntry.topLevelArrayStride;
1601 const int refArraySize = refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
1602 const int refArrayStride = refEntry.arrayStride;
1603 const int refTopLevelSize = refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
1604 const int refTopLevelStride = refEntry.topLevelArrayStride;
1606 DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
1607 DE_UNREF(refArraySize && refTopLevelSize);
1609 for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
1611 deUint8* const resTopPtr = resBasePtr + topElemNdx*resTopLevelStride;
1612 const deUint8* const refTopPtr = refBasePtr + topElemNdx*refTopLevelStride;
1614 for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
1616 deUint8* const resElemPtr = resTopPtr + elementNdx*resArrayStride;
1617 const deUint8* const refElemPtr = refTopPtr + elementNdx*refArrayStride;
1621 const int numRows = glu::getDataTypeMatrixNumRows(resEntry.type);
1622 const int numCols = glu::getDataTypeMatrixNumColumns(resEntry.type);
1625 for (int colNdx = 0; colNdx < numCols; colNdx++)
1627 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1629 deUint8* resCompPtr = resElemPtr + (resEntry.isRowMajor ? rowNdx*resEntry.matrixStride + colNdx*compSize
1630 : colNdx*resEntry.matrixStride + rowNdx*compSize);
1631 const deUint8* refCompPtr = refElemPtr + (refEntry.isRowMajor ? rowNdx*refEntry.matrixStride + colNdx*compSize
1632 : colNdx*refEntry.matrixStride + rowNdx*compSize);
1634 DE_ASSERT((deIntptr)(refCompPtr + compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1635 DE_ASSERT((deIntptr)(resCompPtr + compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1637 isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
1644 if (numFailed < maxPrints)
1646 std::ostringstream expected, got;
1647 generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, refElemPtr);
1648 generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, resElemPtr);
1649 log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1650 << " expected " << expected.str() << "\n"
1651 << " got " << got.str()
1652 << TestLog::EndMessage;
1658 DE_ASSERT((deIntptr)(refElemPtr + scalarSize*compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1659 DE_ASSERT((deIntptr)(resElemPtr + scalarSize*compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1661 const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
1666 if (numFailed < maxPrints)
1668 std::ostringstream expected, got;
1669 generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
1670 generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
1671 log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1672 << " expected " << expected.str() << "\n"
1673 << " got " << got.str()
1674 << TestLog::EndMessage;
1681 if (numFailed >= maxPrints)
1682 log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)" << TestLog::EndMessage;
1684 return numFailed == 0;
1687 bool compareData (tcu::TestLog& log, const BufferLayout& refLayout, const vector<BlockDataPtr>& refBlockPointers, const BufferLayout& resLayout, const vector<BlockDataPtr>& resBlockPointers)
1689 const int numBlocks = (int)refLayout.blocks.size();
1692 for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
1694 const BlockLayoutEntry& refBlock = refLayout.blocks[refBlockNdx];
1695 const BlockDataPtr& refBlockPtr = refBlockPointers[refBlockNdx];
1696 int resBlockNdx = resLayout.getBlockIndex(refBlock.name.c_str());
1698 if (resBlockNdx >= 0)
1700 DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
1702 const BlockDataPtr& resBlockPtr = resBlockPointers[resBlockNdx];
1704 for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin(); refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
1706 const BufferVarLayoutEntry& refEntry = refLayout.bufferVars[*refVarNdxIter];
1707 int resVarNdx = resLayout.getVariableIndex(refEntry.name.c_str());
1711 const BufferVarLayoutEntry& resEntry = resLayout.bufferVars[resVarNdx];
1712 allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
1721 string getBlockAPIName (const BufferBlock& block, int instanceNdx)
1723 DE_ASSERT(block.isArray() || instanceNdx == 0);
1724 return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
1727 // \note Some implementations don't report block members in the order they are declared.
1728 // For checking whether size has to be adjusted by some top-level array actual size,
1729 // we only need to know a) whether there is a unsized top-level array, and b)
1730 // what is stride of that array.
1732 static bool hasUnsizedArray (const BufferLayout& layout, const BlockLayoutEntry& entry)
1734 for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1736 if (isUnsizedArray(layout.bufferVars[*varNdx]))
1743 static int getUnsizedArrayStride (const BufferLayout& layout, const BlockLayoutEntry& entry)
1745 for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1747 const BufferVarLayoutEntry& varEntry = layout.bufferVars[*varNdx];
1749 if (varEntry.arraySize == 0)
1750 return varEntry.arrayStride;
1751 else if (varEntry.topLevelArraySize == 0)
1752 return varEntry.topLevelArrayStride;
1758 vector<int> computeBufferSizes (const ShaderInterface& interface, const BufferLayout& layout)
1760 vector<int> sizes(layout.blocks.size());
1762 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1764 const BufferBlock& block = interface.getBlock(declNdx);
1765 const bool isArray = block.isArray();
1766 const int numInstances = isArray ? block.getArraySize() : 1;
1768 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1770 const string apiName = getBlockAPIName(block, instanceNdx);
1771 const int blockNdx = layout.getBlockIndex(apiName);
1775 const BlockLayoutEntry& blockLayout = layout.blocks[blockNdx];
1776 const int baseSize = blockLayout.size;
1777 const bool isLastUnsized = hasUnsizedArray(layout, blockLayout);
1778 const int lastArraySize = isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
1779 const int stride = isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
1781 sizes[blockNdx] = baseSize + lastArraySize*stride;
1789 BlockDataPtr getBlockDataPtr (const BufferLayout& layout, const BlockLayoutEntry& blockLayout, void* ptr, int bufferSize)
1791 const bool isLastUnsized = hasUnsizedArray(layout, blockLayout);
1792 const int baseSize = blockLayout.size;
1796 const int lastArrayStride = getUnsizedArrayStride(layout, blockLayout);
1797 const int lastArraySize = (bufferSize-baseSize) / (lastArrayStride ? lastArrayStride : 1);
1799 DE_ASSERT(baseSize + lastArraySize*lastArrayStride == bufferSize);
1801 return BlockDataPtr(ptr, bufferSize, lastArraySize);
1804 return BlockDataPtr(ptr, bufferSize, 0);
1812 Buffer (deUint32 buffer_, int size_) : buffer(buffer_), size(size_) {}
1813 Buffer (void) : buffer(0), size(0) {}
1816 struct BlockLocation
1822 BlockLocation (int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_) {}
1823 BlockLocation (void) : index(0), offset(0), size(0) {}
1826 void initRefDataStorage (const ShaderInterface& interface, const BufferLayout& layout, RefDataStorage& storage)
1828 DE_ASSERT(storage.data.empty() && storage.pointers.empty());
1830 const vector<int> bufferSizes = computeBufferSizes(interface, layout);
1833 for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
1834 totalSize += *sizeIter;
1836 storage.data.resize(totalSize);
1838 // Pointers for each block.
1840 deUint8* basePtr = storage.data.empty() ? DE_NULL : &storage.data[0];
1843 DE_ASSERT(bufferSizes.size() == layout.blocks.size());
1844 DE_ASSERT(totalSize == 0 || basePtr);
1846 storage.pointers.resize(layout.blocks.size());
1848 for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
1850 const BlockLayoutEntry& blockLayout = layout.blocks[blockNdx];
1851 const int bufferSize = bufferSizes[blockNdx];
1853 storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
1855 curOffset += bufferSize;
1861 vector<BlockDataPtr> blockLocationsToPtrs (const BufferLayout& layout, const vector<BlockLocation>& blockLocations, const vector<void*>& bufPtrs)
1863 vector<BlockDataPtr> blockPtrs(blockLocations.size());
1865 DE_ASSERT(layout.blocks.size() == blockLocations.size());
1867 for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
1869 const BlockLayoutEntry& blockLayout = layout.blocks[blockNdx];
1870 const BlockLocation& location = blockLocations[blockNdx];
1872 blockPtrs[blockNdx] = getBlockDataPtr(layout, blockLayout, (deUint8*)bufPtrs[location.index] + location.offset, location.size);
1878 } // anonymous (utilities)
1880 de::MovePtr<vk::Allocation> allocateAndBindMemory (Context& context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
1882 const vk::DeviceInterface& vkd = context.getDeviceInterface();
1883 const vk::VkMemoryRequirements bufReqs = vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
1884 de::MovePtr<vk::Allocation> memory = context.getDefaultAllocator().allocate(bufReqs, memReqs);
1886 vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
1891 vk::Move<vk::VkBuffer> createBuffer (Context& context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
1893 const vk::VkDevice vkDevice = context.getDevice();
1894 const vk::DeviceInterface& vk = context.getDeviceInterface();
1895 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
1897 const vk::VkBufferCreateInfo bufferInfo =
1899 vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1900 DE_NULL, // const void* pNext;
1901 0u, // VkBufferCreateFlags flags;
1902 bufferSize, // VkDeviceSize size;
1903 usageFlags, // VkBufferUsageFlags usage;
1904 vk::VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1905 1u, // deUint32 queueFamilyCount;
1906 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1909 return vk::createBuffer(vk, vkDevice, &bufferInfo);
1912 // SSBOLayoutCaseInstance
1914 class SSBOLayoutCaseInstance : public TestInstance
1917 SSBOLayoutCaseInstance (Context& context,
1918 SSBOLayoutCase::BufferMode bufferMode,
1919 const ShaderInterface& interface,
1920 const BufferLayout& refLayout,
1921 const RefDataStorage& initialData,
1922 const RefDataStorage& writeData);
1923 virtual ~SSBOLayoutCaseInstance (void);
1924 virtual tcu::TestStatus iterate (void);
1927 SSBOLayoutCase::BufferMode m_bufferMode;
1928 const ShaderInterface& m_interface;
1929 const BufferLayout& m_refLayout;
1930 const RefDataStorage& m_initialData; // Initial data stored in buffer.
1931 const RefDataStorage& m_writeData; // Data written by compute shader.
1934 typedef de::SharedPtr<vk::Unique<vk::VkBuffer> > VkBufferSp;
1935 typedef de::SharedPtr<vk::Allocation> AllocationSp;
1937 std::vector<VkBufferSp> m_uniformBuffers;
1938 std::vector<AllocationSp> m_uniformAllocs;
1941 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance (Context& context,
1942 SSBOLayoutCase::BufferMode bufferMode,
1943 const ShaderInterface& interface,
1944 const BufferLayout& refLayout,
1945 const RefDataStorage& initialData,
1946 const RefDataStorage& writeData)
1947 : TestInstance (context)
1948 , m_bufferMode (bufferMode)
1949 , m_interface (interface)
1950 , m_refLayout (refLayout)
1951 , m_initialData (initialData)
1952 , m_writeData (writeData)
1956 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance (void)
1960 tcu::TestStatus SSBOLayoutCaseInstance::iterate (void)
1962 // todo: add compute stage availability check
1963 const vk::DeviceInterface& vk = m_context.getDeviceInterface();
1964 const vk::VkDevice device = m_context.getDevice();
1965 const vk::VkQueue queue = m_context.getUniversalQueue();
1966 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1968 // Create descriptor set
1969 const deUint32 acBufferSize = 1024;
1970 vk::Move<vk::VkBuffer> acBuffer (createBuffer(m_context, acBufferSize, vk:: VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
1971 de::UniquePtr<vk::Allocation> acBufferAlloc (allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
1973 deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
1974 flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
1976 vk::DescriptorSetLayoutBuilder setLayoutBuilder;
1977 vk::DescriptorPoolBuilder poolBuilder;
1980 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
1983 const int numBindings = m_interface.getNumBlocks();
1984 for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
1986 const BufferBlock& block = m_interface.getBlock(bindingNdx);
1987 if (block.isArray())
1990 .addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(), vk::VK_SHADER_STAGE_COMPUTE_BIT);
1991 numBlocks += block.getArraySize();
1996 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2002 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)(1 + numBlocks));
2004 const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
2005 const vk::Unique<vk::VkDescriptorPool> descriptorPool(poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2007 const vk::VkDescriptorSetAllocateInfo allocInfo =
2009 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2013 &descriptorSetLayout.get(),
2016 const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
2017 const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
2019 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
2020 std::vector<vk::VkDescriptorBufferInfo> descriptors(numBlocks);
2023 .writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
2025 vector<BlockDataPtr> mappedBlockPtrs;
2027 // Upload base buffers
2028 const std::vector<int> bufferSizes = computeBufferSizes(m_interface, m_refLayout);
2030 std::vector<void*> mapPtrs;
2031 std::vector<BlockLocation> blockLocations (numBlocks);
2033 DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
2035 if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2037 mapPtrs.resize(numBlocks);
2038 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2040 const deUint32 bufferSize = bufferSizes[blockNdx];
2041 DE_ASSERT(bufferSize > 0);
2043 blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
2045 vk::Move<vk::VkBuffer> buffer = createBuffer(m_context, bufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2046 de::MovePtr<vk::Allocation> alloc = allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
2048 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
2050 mapPtrs[blockNdx] = alloc->getHostPtr();
2052 m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2053 m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2058 DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2060 vk::VkPhysicalDeviceProperties properties;
2061 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
2062 const int bindingAlignment = (int)properties.limits.minStorageBufferOffsetAlignment;
2064 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2066 const int bufferSize = bufferSizes[blockNdx];
2067 DE_ASSERT(bufferSize > 0);
2069 if (bindingAlignment > 0)
2070 curOffset = deRoundUp32(curOffset, bindingAlignment);
2072 blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
2073 curOffset += bufferSize;
2076 const int totalBufferSize = curOffset;
2077 vk::Move<vk::VkBuffer> buffer = createBuffer(m_context, totalBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2078 de::MovePtr<vk::Allocation> alloc = allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible);
2080 mapPtrs.push_back(alloc->getHostPtr());
2082 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2084 const deUint32 bufferSize = bufferSizes[blockNdx];
2085 const deUint32 offset = blockLocations[blockNdx].offset;
2087 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
2090 m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2091 m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2094 // Update remaining bindings
2097 for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
2099 const BufferBlock& block = m_interface.getBlock(bindingNdx);
2100 const int numBlocksInBinding = (block.isArray() ? block.getArraySize() : 1);
2102 setUpdateBuilder.writeArray(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
2103 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
2105 blockNdx += numBlocksInBinding;
2109 // Copy the initial data to the storage buffers
2111 mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
2112 copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
2114 if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2116 DE_ASSERT(m_uniformAllocs.size() == bufferSizes.size());
2117 for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2119 const int size = bufferSizes[allocNdx];
2120 vk::Allocation* alloc = m_uniformAllocs[allocNdx].get();
2121 flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), size);
2126 DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2127 DE_ASSERT(m_uniformAllocs.size() == 1);
2129 for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2131 totalSize += bufferSizes[bufferNdx];
2134 DE_ASSERT(totalSize > 0);
2135 vk::Allocation* alloc = m_uniformAllocs[0].get();
2136 flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), totalSize);
2141 setUpdateBuilder.update(vk, device);
2143 const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams =
2145 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2146 DE_NULL, // const void* pNext;
2147 (vk::VkPipelineLayoutCreateFlags)0,
2148 1u, // deUint32 descriptorSetCount;
2149 &*descriptorSetLayout, // const VkDescriptorSetLayout* pSetLayouts;
2150 0u, // deUint32 pushConstantRangeCount;
2151 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
2153 vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
2155 vk::Move<vk::VkShaderModule> shaderModule (createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
2156 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
2158 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,// VkStructureType sType;
2159 DE_NULL, // const void* pNext;
2160 (vk::VkPipelineShaderStageCreateFlags)0,
2161 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStage stage;
2162 *shaderModule, // VkShader shader;
2164 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
2166 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
2168 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2169 DE_NULL, // const void* pNext;
2170 0, // VkPipelineCreateFlags flags;
2171 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
2172 *pipelineLayout, // VkPipelineLayout layout;
2173 DE_NULL, // VkPipeline basePipelineHandle;
2174 0, // deInt32 basePipelineIndex;
2176 vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
2178 vk::Move<vk::VkCommandPool> cmdPool (createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
2179 vk::Move<vk::VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2181 const vk::VkCommandBufferBeginInfo cmdBufBeginParams =
2183 vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
2184 DE_NULL, // const void* pNext;
2185 0u, // VkCmdBufferOptimizeFlags flags;
2186 (const vk::VkCommandBufferInheritanceInfo*)DE_NULL,
2188 VK_CHECK(vk.beginCommandBuffer(*cmdBuffer, &cmdBufBeginParams));
2190 vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2191 vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2193 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
2195 // Add barriers for shader writes to storage buffers before host access
2196 std::vector<vk::VkBufferMemoryBarrier> barriers;
2197 if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2199 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2201 const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
2203 const vk::VkBufferMemoryBarrier barrier =
2205 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2207 vk::VK_ACCESS_SHADER_WRITE_BIT,
2208 vk::VK_ACCESS_HOST_READ_BIT,
2209 VK_QUEUE_FAMILY_IGNORED,
2210 VK_QUEUE_FAMILY_IGNORED,
2213 static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])
2215 barriers.push_back(barrier);
2220 const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
2222 vk::VkDeviceSize totalSize = 0;
2223 for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2224 totalSize += bufferSizes[bufferNdx];
2226 const vk::VkBufferMemoryBarrier barrier =
2228 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2230 vk::VK_ACCESS_SHADER_WRITE_BIT,
2231 vk::VK_ACCESS_HOST_READ_BIT,
2232 VK_QUEUE_FAMILY_IGNORED,
2233 VK_QUEUE_FAMILY_IGNORED,
2238 barriers.push_back(barrier);
2240 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
2241 0u, DE_NULL, static_cast<deUint32>(barriers.size()), &barriers[0], 0u, DE_NULL);
2243 VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
2245 vk::Move<vk::VkFence> fence (createFence(vk, device));
2247 const vk::VkSubmitInfo submitInfo =
2249 vk::VK_STRUCTURE_TYPE_SUBMIT_INFO,
2252 (const vk::VkSemaphore*)DE_NULL,
2253 (const vk::VkPipelineStageFlags*)DE_NULL,
2257 (const vk::VkSemaphore*)DE_NULL,
2260 VK_CHECK(vk.queueSubmit(queue, 1u, &submitInfo, *fence));
2261 VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), DE_TRUE, ~0ull));
2263 // Read back ac_numPassed data
2266 const int refCount = 1;
2269 resCount = *((const int*)acBufferAlloc->getHostPtr());
2271 counterOk = (refCount == resCount);
2274 m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount << ", expected " << refCount << TestLog::EndMessage;
2279 const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers, m_refLayout, mappedBlockPtrs);
2281 if (compareOk && counterOk)
2282 return tcu::TestStatus::pass("Result comparison and counter values are OK");
2283 else if (!compareOk && counterOk)
2284 return tcu::TestStatus::fail("Result comparison failed");
2285 else if (compareOk && !counterOk)
2286 return tcu::TestStatus::fail("Counter value incorrect");
2288 return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
2293 SSBOLayoutCase::SSBOLayoutCase (tcu::TestContext& testCtx, const char* name, const char* description, BufferMode bufferMode, MatrixLoadFlags matrixLoadFlag)
2294 : TestCase (testCtx, name, description)
2295 , m_bufferMode (bufferMode)
2296 , m_matrixLoadFlag (matrixLoadFlag)
2300 SSBOLayoutCase::~SSBOLayoutCase (void)
2304 void SSBOLayoutCase::initPrograms (vk::SourceCollections& programCollection) const
2306 DE_ASSERT(!m_computeShaderSrc.empty());
2308 if (usesRelaxedLayout(m_interface))
2310 programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
2311 << vk::GlslBuildOptions(vk::SPIRV_VERSION_1_0, vk::GlslBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
2314 programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
2317 TestInstance* SSBOLayoutCase::createInstance (Context& context) const
2319 if (!de::contains(context.getDeviceExtensions().begin(), context.getDeviceExtensions().end(), "VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
2320 TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
2321 return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData);
2324 void SSBOLayoutCase::init ()
2326 computeReferenceLayout (m_refLayout, m_interface);
2327 initRefDataStorage (m_interface, m_refLayout, m_initialData);
2328 initRefDataStorage (m_interface, m_refLayout, m_writeData);
2329 generateValues (m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
2330 generateValues (m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
2331 copyNonWrittenData (m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
2333 m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers, m_matrixLoadFlag);