From 73a9fa0c8288c84d143e33b65ee1db2faed9e5c3 Mon Sep 17 00:00:00 2001 From: Ari Suonpaa Date: Tue, 29 May 2018 09:43:22 +0300 Subject: [PATCH] Fix invalid block layouts Upcoming update in SPIR-V validator checks correctness of block layouts. This change fixes the errors in 16bit storage tests found by the new validation rules. Affects: dEQP-VK.spirv_assembly.instruction.*.16bit_storage.* Components: Vulkan VK-GL-CTS issue: 1184 Change-Id: I1794fdefe044c6e256e189ab5a21c69a69eeb58e --- .../spirv_assembly/vktSpvAsm16bitStorageTests.cpp | 487 +++++++++++++-------- 1 file changed, 316 insertions(+), 171 deletions(-) diff --git a/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp b/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp index 2dbc069..cf47cea 100644 --- a/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp +++ b/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp @@ -183,7 +183,7 @@ int getStructSize(const ShaderTemplate shaderTemplate) template bool graphicsCheck16BitFloats (const std::vector& originalFloats, const vector& outputAllocs, - const std::vector& /* expectedOutputs */, + const std::vector& expectedOutputs, tcu::TestLog& log) { if (outputAllocs.size() != originalFloats.size()) @@ -196,10 +196,11 @@ bool graphicsCheck16BitFloats (const std::vector& originalFloats, const deUint16* returned = static_cast(outputAllocs[outputNdx]->getHostPtr()); const float* original = reinterpret_cast(&originalBytes.front()); - const deUint32 count = static_cast(originalBytes.size() / sizeof(float)); + const deUint32 count = static_cast(expectedOutputs[outputNdx].second->getByteSize() / sizeof(deUint16)); + const deUint32 inputStride = static_cast(originalBytes.size() / sizeof(float)) / count; for (deUint32 numNdx = 0; numNdx < count; ++numNdx) - if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log)) + if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log)) return false; } @@ -234,7 +235,7 @@ bool computeCheckBuffersFloats (const std::vector& originalFloats, template bool computeCheck16BitFloats (const std::vector& originalFloats, const vector& outputAllocs, - const std::vector& /* expectedOutputs */, + const std::vector& expectedOutputs, tcu::TestLog& log) { if (outputAllocs.size() != originalFloats.size()) @@ -247,10 +248,11 @@ bool computeCheck16BitFloats (const std::vector& originalFloats, const deUint16* returned = static_cast(outputAllocs[outputNdx]->getHostPtr()); const float* original = reinterpret_cast(&originalBytes.front()); - const deUint32 count = static_cast(originalBytes.size() / sizeof(float)); + const deUint32 count = static_cast(expectedOutputs[outputNdx]->getByteSize() / sizeof(deUint16)); + const deUint32 inputStride = static_cast(originalBytes.size() / sizeof(float)) / count; for (deUint32 numNdx = 0; numNdx < count; ++numNdx) - if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log)) + if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log)) return false; } @@ -1391,53 +1393,67 @@ void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group) bool useConstantIndex; unsigned constantIndex; unsigned count; + unsigned inputStride; }; - const CompositeType cTypes[] = + const CompositeType cTypes[2][5] = { - {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0, numElements}, - {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements}, - {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements}, - {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n", false, 0, numElements / 2}, - {"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8}, + { + {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0, numElements, 1}, + {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements, 1}, + {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements, 1}, + {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n", false, 0, numElements / 2, 2}, + {"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8, 8} + }, + { + {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", false, 0, numElements, 8}, + {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 5, numElements, 8}, + {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 8, numElements, 8}, + {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 16\n", false, 0, numElements / 2, 8}, + {"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8, 8} + } }; - vector float16Data = getFloat16s(rnd, numElements); - vector float32Data; - - float32Data.reserve(numElements); - for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx) - float32Data.push_back(deFloat16To32(float16Data[numIdx])); - for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) - for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx) + for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx) { ComputeShaderSpec spec; map specs; - string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float"; + string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float"; specs["capability"] = CAPABILITIES[capIdx].cap; specs["storage"] = CAPABILITIES[capIdx].decor; - specs["stride"] = cTypes[tyIdx].stride; - specs["base32"] = cTypes[tyIdx].base32; - specs["base16"] = cTypes[tyIdx].base16; + specs["stride"] = cTypes[capIdx][tyIdx].stride; + specs["base32"] = cTypes[capIdx][tyIdx].base32; + specs["base16"] = cTypes[capIdx][tyIdx].base16; specs["types"] = floatTypes; specs["convert"] = "OpFConvert"; - specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex); - if (cTypes[tyIdx].useConstantIndex) + specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex); + if (cTypes[capIdx][tyIdx].useConstantIndex) specs["arrayindex"] = "c_i32_ci"; else specs["arrayindex"] = "x"; + const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride; + const deUint32 count = cTypes[capIdx][tyIdx].count; + const deUint32 scalarsPerItem = numElements / count; + vector float16Data = getFloat16s(rnd, numElements * inputStride); + vector float32Data; + + float32Data.reserve(numElements); + for (deUint32 numIdx = 0; numIdx < count; ++numIdx) + for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++) + float32Data.push_back(deFloat16To32(float16Data[numIdx * inputStride + scalarIdx])); + vector float32DataConstIdx; - if (cTypes[tyIdx].useConstantIndex) + if (cTypes[capIdx][tyIdx].useConstantIndex) { - const deUint32 numFloats = numElements / cTypes[tyIdx].count; + const deUint32 numFloats = numElements / cTypes[capIdx][tyIdx].count; for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx) - float32DataConstIdx.push_back(float32Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]); + float32DataConstIdx.push_back(float32Data[cTypes[capIdx][tyIdx].constantIndex * numFloats + numIdx % numFloats]); } - if (strcmp(cTypes[tyIdx].name, "matrix") == 0) + if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0) { specs["index0"] = "%zero"; specs["matrix_prefix"] = "m4"; @@ -1472,12 +1488,12 @@ void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group) } spec.assembly = shaderTemplate.specialize(specs); - spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1); + spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1); spec.verifyIO = check32BitFloats; spec.inputTypes[0] = CAPABILITIES[capIdx].dtype; spec.inputs.push_back(BufferSp(new Float16Buffer(float16Data))); - spec.outputs.push_back(BufferSp(new Float32Buffer(cTypes[tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))); + spec.outputs.push_back(BufferSp(new Float32Buffer(cTypes[capIdx][tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))); spec.extensions.push_back("VK_KHR_16bit_storage"); spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name); @@ -1522,55 +1538,72 @@ void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group) bool useConstantIndex; unsigned constantIndex; unsigned count; + unsigned inputStride; }; - const CompositeType cTypes[] = + const CompositeType cTypes[2][8] = { - {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements}, - {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements}, - {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements}, - {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements}, - {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements}, - {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements}, - {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n", false, 0, numElements / 4}, - {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n", false, 0, numElements / 4} + { + {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements, 1}, + {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements, 1}, + {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements, 1}, + {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements, 1}, + {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements, 1}, + {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements, 1}, + {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n", false, 0, numElements / 4, 4}, + {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n", false, 0, numElements / 4, 4} + }, + { + {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", false, 0, numElements, 8}, + {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 5, numElements, 8}, + {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 8, numElements, 8}, + {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", false, 0, numElements, 8}, + {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 5, numElements, 8}, + {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 8, numElements, 8}, + {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 16\n", false, 0, numElements / 4, 8}, + {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 16\n", false, 0, numElements / 4, 8} + } }; - vector inputs = getInt16s(rnd, numElements); - vector sOutputs; - vector uOutputs; - const deUint16 signBitMask = 0x8000; - const deUint32 signExtendMask = 0xffff0000; - - sOutputs.reserve(inputs.size()); - uOutputs.reserve(inputs.size()); - - for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx) - { - uOutputs.push_back(static_cast(inputs[numNdx])); - if (inputs[numNdx] & signBitMask) - sOutputs.push_back(static_cast(inputs[numNdx] | signExtendMask)); - else - sOutputs.push_back(static_cast(inputs[numNdx])); - } - for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) - for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx) + for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx) { ComputeShaderSpec spec; map specs; - string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name; + string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name; + const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride; + vector inputs = getInt16s(rnd, numElements * inputStride); + vector sOutputs; + vector uOutputs; + const deUint16 signBitMask = 0x8000; + const deUint32 signExtendMask = 0xffff0000; + const deUint32 count = cTypes[capIdx][tyIdx].count; + const deUint32 scalarsPerItem = numElements / count; + + sOutputs.reserve(numElements); + uOutputs.reserve(numElements); + + for (deUint32 numNdx = 0; numNdx < count; ++numNdx) + for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; ++scalarIdx) + { + const deInt16 input = inputs[numNdx * inputStride + scalarIdx]; + + uOutputs.push_back(static_cast(input)); + if (input & signBitMask) + sOutputs.push_back(static_cast(input | signExtendMask)); + else + sOutputs.push_back(static_cast(input)); + } + vector intDataConstIdx; - if (cTypes[tyIdx].useConstantIndex) + if (cTypes[capIdx][tyIdx].useConstantIndex) { - const deUint32 numInts = numElements / cTypes[tyIdx].count; - for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx) { - const deInt32 idx = cTypes[tyIdx].constantIndex * numInts + numIdx % numInts; + const deInt32 idx = cTypes[capIdx][tyIdx].constantIndex * scalarsPerItem + numIdx % scalarsPerItem; - if (cTypes[tyIdx].isSigned) + if (cTypes[capIdx][tyIdx].isSigned) intDataConstIdx.push_back(sOutputs[idx]); else intDataConstIdx.push_back(uOutputs[idx]); @@ -1579,25 +1612,25 @@ void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group) specs["capability"] = CAPABILITIES[capIdx].cap; specs["storage"] = CAPABILITIES[capIdx].decor; - specs["stride"] = cTypes[tyIdx].stride; - specs["base32"] = cTypes[tyIdx].base32; - specs["base16"] = cTypes[tyIdx].base16; - specs["types"] = cTypes[tyIdx].types; - specs["convert"] = cTypes[tyIdx].opcode; - specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex); - if (cTypes[tyIdx].useConstantIndex) + specs["stride"] = cTypes[capIdx][tyIdx].stride; + specs["base32"] = cTypes[capIdx][tyIdx].base32; + specs["base16"] = cTypes[capIdx][tyIdx].base16; + specs["types"] = cTypes[capIdx][tyIdx].types; + specs["convert"] = cTypes[capIdx][tyIdx].opcode; + specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex); + if (cTypes[capIdx][tyIdx].useConstantIndex) specs["arrayindex"] = "c_i32_ci"; else specs["arrayindex"] = "x"; spec.assembly = shaderTemplate.specialize(specs); - spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1); + spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1); spec.inputTypes[0] = CAPABILITIES[capIdx].dtype; spec.inputs.push_back(BufferSp(new Int16Buffer(inputs))); - if (cTypes[tyIdx].useConstantIndex) + if (cTypes[capIdx][tyIdx].useConstantIndex) spec.outputs.push_back(BufferSp(new Int32Buffer(intDataConstIdx))); - else if (cTypes[tyIdx].isSigned) + else if (cTypes[capIdx][tyIdx].isSigned) spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs))); else spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs))); @@ -1613,7 +1646,7 @@ void addCompute16bitStorageUniform16To32ChainAccessGroup (tcu::TestCaseGroup* gr { tcu::TestContext& testCtx = group->getTestContext(); de::Random rnd (deStringHash(group->getName())); - const deUint32 structSize = 24; // In number of 16bit items. Includes padding. + const deUint32 structSize = 128; // In number of 16bit items. Includes padding. vector inputDataFloat = getFloat16s(rnd, structSize * 4); vector inputDataInt = getInt16s(rnd, structSize * 4); vector outputDataFloat; @@ -1653,13 +1686,13 @@ void addCompute16bitStorageUniform16To32ChainAccessGroup (tcu::TestCaseGroup* gr " OpDecorate %Output BufferBlock\n" " OpDecorate %dataOutput DescriptorSet 0\n" " OpDecorate %dataOutput Binding 1\n" - " OpDecorate %scalarArray ArrayStride 2\n" - " OpDecorate %scalarArray2D ArrayStride 8\n" + " OpDecorate %scalarArray ArrayStride 16\n" + " OpDecorate %scalarArray2D ArrayStride 48\n" " OpMemberDecorate %S 0 Offset 0\n" - " OpMemberDecorate %S 1 Offset 8\n" + " OpMemberDecorate %S 1 Offset 48\n" " ${decoration:opt}\n" - " OpMemberDecorate %S 2 Offset 40\n" - " OpDecorate %_arr_S_uint_4 ArrayStride 48\n" + " OpMemberDecorate %S 2 Offset 240\n" + " OpDecorate %_arr_S_uint_4 ArrayStride 256\n" " OpMemberDecorate %Input 0 Offset 0\n" " OpMemberDecorate %Output 0 Offset 0\n" " OpDecorate %Input ${storage}\n" @@ -1724,19 +1757,28 @@ void addCompute16bitStorageUniform16To32ChainAccessGroup (tcu::TestCaseGroup* gr const deUint32 signExtendMask = 0xffff0000; // Determine the selected output float for the selected indices. const tcu::UVec4 vec = indices[numIdx]; - // Offsets are in multiples of 16bits. - const deUint32 fieldOffsets[3][3] = + // Offsets are in multiples of 16bits. Floats are using matrix as the + // second field, which has different layout rules than 2D array. + // Therefore separate offset tables are needed. + const deUint32 fieldOffsetsFloat[3][3] = { - {0u, 1u, 0u}, - {4u, 4u, 1u}, - {20u, 1u, 0u} + {0u, 8u, 0u}, + {24, 24u, 1u}, + {120u, 1u, 0u} }; - const deUint32 offset = vec.x() * structSize + fieldOffsets[vec.y()][0] + fieldOffsets[vec.y()][1] * vec.z() + fieldOffsets[vec.y()][2] * vec.w(); - const bool hasSign = inputDataInt[offset] & signBitMask; + const deUint32 fieldOffsetsInt[3][3] = + { + {0u, 8u, 0u}, + {24, 24u, 8u}, + {120u, 1u, 0u} + }; + const deUint32 offsetFloat = vec.x() * structSize + fieldOffsetsFloat[vec.y()][0] + fieldOffsetsFloat[vec.y()][1] * vec.z() + fieldOffsetsFloat[vec.y()][2] * vec.w(); + const deUint32 offsetInt = vec.x() * structSize + fieldOffsetsInt[vec.y()][0] + fieldOffsetsInt[vec.y()][1] * vec.z() + fieldOffsetsInt[vec.y()][2] * vec.w(); + const bool hasSign = inputDataInt[offsetInt] & signBitMask; - outputDataFloat.push_back(deFloat16To32(inputDataFloat[offset])); - outputDataUInt.push_back((deUint16)inputDataInt[offset]); - outputDataSInt.push_back((deInt32)(inputDataInt[offset] | (hasSign ? signExtendMask : 0u))); + outputDataFloat.push_back(deFloat16To32(inputDataFloat[offsetFloat])); + outputDataUInt.push_back((deUint16)inputDataInt[offsetInt]); + outputDataSInt.push_back((deInt32)(inputDataInt[offsetInt] | (hasSign ? signExtendMask : 0u))); } for (deUint32 indicesIdx = 0; indicesIdx < (deUint32)indices.size(); ++indicesIdx) @@ -1790,7 +1832,7 @@ void addCompute16bitStorageUniform16To32ChainAccessGroup (tcu::TestCaseGroup* gr if (dataTypeIdx == 0) { spec.verifyIO = check32BitFloats; - specs["decoration"] = "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 8\n"; + specs["decoration"] = "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 48\n"; } spec.assembly = shaderTemplate.specialize(specs); @@ -1823,7 +1865,7 @@ void addCompute16bitStoragePushConstant16To32Group (tcu::TestCaseGroup* group) "${stride}" - "OpDecorate %PC16 Block\n" + "OpDecorate %PC16 BufferBlock\n" "OpMemberDecorate %PC16 0 Offset 0\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpDecorate %SSBO32 BufferBlock\n" @@ -2102,7 +2144,6 @@ void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup map fragments; const deUint32 numDataPoints = 256; RGBA defaultColors[4]; - GraphicsResources resources; vector extensions; const StringTemplate capabilities ("OpCapability ${cap}\n"); // inputs and outputs are declared to be vectors of signed integers. @@ -2116,8 +2157,6 @@ void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx) outputs.push_back(static_cast(0xffff & inputs[numNdx])); - resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs)))); - resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs)))); extensions.push_back("VK_KHR_16bit_storage"); fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\""; @@ -2154,7 +2193,7 @@ void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup " %ssbo16 = OpVariable %up_SSBO16 Uniform\n"); const StringTemplate scalarDecoration( - "OpDecorate %ra_i32 ArrayStride 4\n" + "OpDecorate %ra_i32 ArrayStride ${arraystride}\n" "OpDecorate %ra_i16 ArrayStride 2\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO16 0 Offset 0\n" @@ -2262,26 +2301,57 @@ void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup "OpFunctionEnd\n"); - struct Category + // Scalar { - const char* name; - const StringTemplate& preMain; - const StringTemplate& decoration; - const StringTemplate& testFunction; - }; + const deUint32 arrayStrides[] = {4, 16}; + + for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) + for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx) + { + map specs; + string name = string(CAPABILITIES[capIdx].name) + "_scalar_" + intFacts[factIdx].name; - const Category categories[] = + specs["cap"] = CAPABILITIES[capIdx].cap; + specs["indecor"] = CAPABILITIES[capIdx].decor; + specs["itype32"] = intFacts[factIdx].type32; + specs["v4itype32"] = "%v4" + string(intFacts[factIdx].type32).substr(1); + specs["itype16"] = intFacts[factIdx].type16; + specs["signed"] = intFacts[factIdx].isSigned; + specs["convert"] = intFacts[factIdx].opcode; + specs["arraystride"] = de::toString(arrayStrides[capIdx]); + + fragments["pre_main"] = scalarPreMain.specialize(specs); + fragments["testfun"] = scalarTestFunc.specialize(specs); + fragments["capability"] = capabilities.specialize(specs); + fragments["decoration"] = scalarDecoration.specialize(specs); + + vector inputsPadded; + for (size_t dataIdx = 0; dataIdx < inputs.size(); ++dataIdx) + { + inputsPadded.push_back(inputs[dataIdx]); + for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 4 - 1; ++padIdx) + inputsPadded.push_back(0); + } + GraphicsResources resources; + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputsPadded)))); + resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs)))); + + resources.inputs.back().first = CAPABILITIES[capIdx].dtype; + + createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name)); + } + } + // Vector { - {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc}, - {"vector", vecPreMain, vecDecoration, vecTestFunc}, - }; + GraphicsResources resources; + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs)))); + resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs)))); - for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx) for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx) { map specs; - string name = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name; + string name = string(CAPABILITIES[capIdx].name) + "_vector_" + intFacts[factIdx].name; specs["cap"] = CAPABILITIES[capIdx].cap; specs["indecor"] = CAPABILITIES[capIdx].decor; @@ -2291,15 +2361,16 @@ void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup specs["signed"] = intFacts[factIdx].isSigned; specs["convert"] = intFacts[factIdx].opcode; - fragments["pre_main"] = categories[catIdx].preMain.specialize(specs); - fragments["testfun"] = categories[catIdx].testFunction.specialize(specs); + fragments["pre_main"] = vecPreMain.specialize(specs); + fragments["testfun"] = vecTestFunc.specialize(specs); fragments["capability"] = capabilities.specialize(specs); - fragments["decoration"] = categories[catIdx].decoration.specialize(specs); + fragments["decoration"] = vecDecoration.specialize(specs); resources.inputs.back().first = CAPABILITIES[capIdx].dtype; createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name)); } + } } void addCompute16bitStorageUniform16To16Group (tcu::TestCaseGroup* group) @@ -2493,36 +2564,44 @@ void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group) const char* base16; const char* stride; unsigned count; + unsigned inputStride; }; - const CompositeType cTypes[] = + const CompositeType cTypes[2][3] = { - {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", numElements}, - {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4}, - {"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8}, + { // BufferBlock + {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", numElements, 1}, + {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4, 1}, + {"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1} + }, + { // Block + {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 16\nOpDecorate %f16arr ArrayStride 2\n", numElements, 4}, + {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4, 1}, + {"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1} + } }; - vector float32Data = getFloat32s(rnd, numElements); vector float16DummyData (numElements, 0); for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) - for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx) + for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx) for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx) { ComputeShaderSpec spec; map specs; - string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" + rndModes[rndModeIdx].name; + string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float_" + rndModes[rndModeIdx].name; + vector float32Data = getFloat32s(rnd, numElements * cTypes[capIdx][tyIdx].inputStride); specs["capability"] = CAPABILITIES[capIdx].cap; specs["storage"] = CAPABILITIES[capIdx].decor; - specs["stride"] = cTypes[tyIdx].stride; - specs["base32"] = cTypes[tyIdx].base32; - specs["base16"] = cTypes[tyIdx].base16; + specs["stride"] = cTypes[capIdx][tyIdx].stride; + specs["base32"] = cTypes[capIdx][tyIdx].base32; + specs["base16"] = cTypes[capIdx][tyIdx].base16; specs["rounding"] = rndModes[rndModeIdx].decor; specs["types"] = floatTypes; specs["convert"] = "OpFConvert"; - if (strcmp(cTypes[tyIdx].name, "matrix") == 0) + if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0) { if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0) specs["rounding"] += "\nOpDecorate %val16_1 FPRoundingMode RTZ\n"; @@ -2550,7 +2629,7 @@ void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group) } spec.assembly = shaderTemplate.specialize(specs); - spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1); + spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1); spec.verifyIO = rndModes[rndModeIdx].func; spec.inputTypes[0] = CAPABILITIES[capIdx].dtype; @@ -2599,40 +2678,53 @@ void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group) const char* opcode; const char* stride; unsigned count; + unsigned inputStride; }; - const CompositeType cTypes[] = + const CompositeType cTypes[2][4] = { - {"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", numElements}, - {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", numElements}, - {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2}, - {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2}, + { + {"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", numElements, 1}, + {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", numElements, 1}, + {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 2}, + {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 2} + }, + { + {"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 16\nOpDecorate %i16arr ArrayStride 2\n", numElements, 4}, + {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 16\nOpDecorate %u16arr ArrayStride 2\n", numElements, 4}, + {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 16\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 4}, + {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 16\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 4} + } }; - vector inputs = getInt32s(rnd, numElements); - vector outputs; - - outputs.reserve(inputs.size()); - for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx) - outputs.push_back(static_cast(0xffff & inputs[numNdx])); - for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) - for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx) + for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx) { ComputeShaderSpec spec; map specs; - string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name; + string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name; + const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride; + const deUint32 count = cTypes[capIdx][tyIdx].count; + const deUint32 scalarsPerItem = numElements / count; + + vector inputs = getInt32s(rnd, numElements * inputStride); + vector outputs; + + outputs.reserve(numElements); + for (deUint32 numNdx = 0; numNdx < count; ++numNdx) + for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++) + outputs.push_back(static_cast(0xffff & inputs[numNdx * inputStride + scalarIdx])); specs["capability"] = CAPABILITIES[capIdx].cap; specs["storage"] = CAPABILITIES[capIdx].decor; - specs["stride"] = cTypes[tyIdx].stride; - specs["base32"] = cTypes[tyIdx].base32; - specs["base16"] = cTypes[tyIdx].base16; - specs["types"] = cTypes[tyIdx].types; - specs["convert"] = cTypes[tyIdx].opcode; + specs["stride"] = cTypes[capIdx][tyIdx].stride; + specs["base32"] = cTypes[capIdx][tyIdx].base32; + specs["base16"] = cTypes[capIdx][tyIdx].base16; + specs["types"] = cTypes[capIdx][tyIdx].types; + specs["convert"] = cTypes[capIdx][tyIdx].opcode; spec.assembly = shaderTemplate.specialize(specs); - spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1); + spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1); spec.inputTypes[0] = CAPABILITIES[capIdx].dtype; spec.inputs.push_back(BufferSp(new Int32Buffer(inputs))); @@ -3300,17 +3392,21 @@ void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGro { de::Random rnd (deStringHash(testGroup->getName())); map fragments; - GraphicsResources resources; vector extensions; const deUint32 numDataPoints = 256; RGBA defaultColors[4]; - vector float32Data = getFloat32s(rnd, numDataPoints); + const vector float32Data = getFloat32s(rnd, numDataPoints); + vector float32DataPadded; vector float16DummyData (numDataPoints, 0); const StringTemplate capabilities ("OpCapability ${cap}\n"); - resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data)))); - // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter. - resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData)))); + for (size_t dataIdx = 0; dataIdx < float32Data.size(); ++dataIdx) + { + float32DataPadded.push_back(float32Data[dataIdx]); + float32DataPadded.push_back(0.0f); + float32DataPadded.push_back(0.0f); + float32DataPadded.push_back(0.0f); + } extensions.push_back("VK_KHR_16bit_storage"); fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\""; @@ -3340,7 +3436,7 @@ void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGro " %ssbo16 = OpVariable %up_SSBO16 Uniform\n"; const StringTemplate decoration ( - "OpDecorate %ra_f32 ArrayStride 4\n" + "OpDecorate %ra_f32 ArrayStride ${arraystride}\n" "OpDecorate %ra_f16 ArrayStride 2\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO16 0 Offset 0\n" @@ -3387,21 +3483,29 @@ void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGro "OpFunctionEnd\n"; - const RndMode rndModes[] = + const RndMode rndModes[] = { {"rtz", "OpDecorate %val16 FPRoundingMode RTZ", graphicsCheck16BitFloats}, {"rte", "OpDecorate %val16 FPRoundingMode RTE", graphicsCheck16BitFloats}, {"unspecified_rnd_mode", "", graphicsCheck16BitFloats}, }; + const deUint32 arrayStrides[] = {4, 16}; + for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx) { map specs; string testName = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name; + GraphicsResources resources; + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(arrayStrides[capIdx] == 4 ? float32Data : float32DataPadded)))); + // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter. + resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData)))); + specs["cap"] = CAPABILITIES[capIdx].cap; specs["indecor"] = CAPABILITIES[capIdx].decor; + specs["arraystride"] = de::toString(arrayStrides[capIdx]); specs["rounddecor"] = rndModes[rndModeIdx].decor; fragments["capability"] = capabilities.specialize(specs); @@ -3410,11 +3514,16 @@ void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGro resources.inputs.back().first = CAPABILITIES[capIdx].dtype; resources.verifyIO = rndModes[rndModeIdx].f; - createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name)); } } + // Non-scalar cases can use the same resources. + GraphicsResources resources; + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data)))); + // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter. + resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData)))); + { // vector cases fragments["pre_main"] = " %f16 = OpTypeFloat 16\n" @@ -3737,7 +3846,7 @@ void addGraphics16BitStorageInputOutputFloat32To16Group (tcu::TestCaseGroup* tes for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx) { - string testName = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name; + string testName = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name; for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx) { @@ -4457,7 +4566,7 @@ void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* te "OpDecorate %a64f32 ArrayStride 4\n" "OpDecorate %SSBO32 BufferBlock\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" - "OpDecorate %PC16 Block\n" + "OpDecorate %PC16 BufferBlock\n" "OpMemberDecorate %PC16 0 Offset 0\n" "OpDecorate %ssbo32 DescriptorSet 0\n" "OpDecorate %ssbo32 Binding 0\n"; @@ -4526,7 +4635,7 @@ void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* te "OpDecorate %a16v4f32 ArrayStride 16\n" "OpDecorate %SSBO32 BufferBlock\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" - "OpDecorate %PC16 Block\n" + "OpDecorate %PC16 BufferBlock\n" "OpMemberDecorate %PC16 0 Offset 0\n" "OpDecorate %ssbo32 DescriptorSet 0\n" "OpDecorate %ssbo32 Binding 0\n"; @@ -4599,7 +4708,7 @@ void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* te "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO32 0 ColMajor\n" "OpMemberDecorate %SSBO32 0 MatrixStride 16\n" - "OpDecorate %PC16 Block\n" + "OpDecorate %PC16 BufferBlock\n" "OpMemberDecorate %PC16 0 Offset 0\n" "OpMemberDecorate %PC16 0 ColMajor\n" "OpMemberDecorate %PC16 0 MatrixStride 8\n" @@ -4766,7 +4875,7 @@ void addGraphics16BitStoragePushConstantInt16To32Group (tcu::TestCaseGroup* test "OpDecorate %a${count}${type32} ArrayStride 4\n" "OpDecorate %SSBO32 BufferBlock\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" - "OpDecorate %PC16 Block\n" + "OpDecorate %PC16 BufferBlock\n" "OpMemberDecorate %PC16 0 Offset 0\n" "OpDecorate %ssbo32 DescriptorSet 0\n" "OpDecorate %ssbo32 Binding 0\n"); @@ -4881,7 +4990,7 @@ void addGraphics16BitStoragePushConstantInt16To32Group (tcu::TestCaseGroup* test "OpDecorate %a${count}${type32} ArrayStride 8\n" "OpDecorate %SSBO32 BufferBlock\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" - "OpDecorate %PC16 Block\n" + "OpDecorate %PC16 BufferBlock\n" "OpMemberDecorate %PC16 0 Offset 0\n" "OpDecorate %ssbo32 DescriptorSet 0\n" "OpDecorate %ssbo32 Binding 0\n"); @@ -4986,7 +5095,6 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup vector inputs = getInt16s(rnd, numDataPoints); vector sOutputs; vector uOutputs; - GraphicsResources resources; vector extensions; const deUint16 signBitMask = 0x8000; const deUint32 signExtendMask = 0xffff0000; @@ -5004,8 +5112,6 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup sOutputs.push_back(static_cast(inputs[numNdx])); } - resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputs)))); - extensions.push_back("VK_KHR_16bit_storage"); fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\""; @@ -5057,7 +5163,7 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup const StringTemplate scalarDecoration ( "OpDecorate %ra_i32 ArrayStride 4\n" - "OpDecorate %ra_i16 ArrayStride 2\n" + "OpDecorate %ra_i16 ArrayStride ${arraystride}\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO16 0 Offset 0\n" "OpDecorate %SSBO32 BufferBlock\n" @@ -5119,7 +5225,7 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup const StringTemplate vecDecoration ( "OpDecorate %ra_v2i32 ArrayStride 8\n" - "OpDecorate %ra_v2i16 ArrayStride 4\n" + "OpDecorate %ra_v2i16 ArrayStride ${arraystride}\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO16 0 Offset 0\n" "OpDecorate %SSBO32 BufferBlock\n" @@ -5129,7 +5235,7 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup "OpDecorate %ssbo32 Binding 1\n" "OpDecorate %ssbo16 Binding 0\n"); - const StringTemplate vecTestFunc ( + const StringTemplate vecTestFunc ( "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n" " %param = OpFunctionParameter %v4f32\n" @@ -5172,12 +5278,14 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup const deUint32 numElements; }; - const Category categories[] = + const Category categories[] = { {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc, 1}, {"vector", vecPreMain, vecDecoration, vecTestFunc, 2}, }; + const deUint32 minArrayStride[] = {2, 16}; + for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx) for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx) @@ -5187,9 +5295,12 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup deUint32 constIdx = constantIndices[constIndexIdx].constantIndex; map specs; string name = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name; + const deUint32 numElements = categories[catIdx].numElements; + const deUint32 arrayStride = de::max(numElements * 2, minArrayStride[capIdx]); specs["cap"] = CAPABILITIES[capIdx].cap; specs["indecor"] = CAPABILITIES[capIdx].decor; + specs["arraystride"] = de::toString(arrayStride); specs["itype32"] = intFacts[factIdx].type32; specs["v2itype32"] = "%v2" + string(intFacts[factIdx].type32).substr(1); specs["v3itype32"] = "%v3" + string(intFacts[factIdx].type32).substr(1); @@ -5210,10 +5321,21 @@ void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup fragments["capability"] = capabilities.specialize(specs); fragments["decoration"] = categories[catIdx].decoration.specialize(specs); + GraphicsResources resources; + vector inputsPadded; + for (size_t dataIdx = 0; dataIdx < inputs.size() / numElements; ++dataIdx) + { + for (deUint32 elementIdx = 0; elementIdx < numElements; ++elementIdx) + inputsPadded.push_back(inputs[dataIdx * numElements + elementIdx]); + for (deUint32 padIdx = 0; padIdx < arrayStride / 2 - numElements; ++padIdx) + inputsPadded.push_back(0); + } + + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputsPadded)))); + vector constIdxOutputs; if (useConstIdx) { - const deUint32 numElements = categories[catIdx].numElements; name += string("_const_idx_") + de::toString(constIdx); for (deUint32 i = 0; i < numDataPoints; i++) { @@ -5282,7 +5404,7 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro const StringTemplate decoration ( "OpDecorate %ra_f32 ArrayStride 4\n" - "OpDecorate %ra_f16 ArrayStride 2\n" + "OpDecorate %ra_f16 ArrayStride ${arraystride}\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO16 0 Offset 0\n" "OpDecorate %SSBO32 BufferBlock\n" @@ -5328,6 +5450,8 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro "OpFunctionEnd\n"); + const deUint32 arrayStrides[] = {2, 16}; + for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx) { for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) @@ -5340,6 +5464,7 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro specs["cap"] = CAPABILITIES[capIdx].cap; specs["indecor"] = CAPABILITIES[capIdx].decor; + specs["arraystride"] = de::toString(arrayStrides[capIdx]); specs["constarrayidx"] = de::toString(constIdx); if (useConstIdx) specs["arrayindex"] = "c_i32_ci"; @@ -5351,12 +5476,20 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro fragments["pre_main"] = preMain.specialize(specs); fragments["testfun"] = testFun.specialize(specs); + vector inputData; + for (size_t dataIdx = 0; dataIdx < float16Data.size(); ++dataIdx) + { + inputData.push_back(float16Data[dataIdx]); + for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 1; ++padIdx) + inputData.push_back(deFloat16(0.0f)); + } + vector float32Data; float32Data.reserve(numDataPoints); for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx) float32Data.push_back(deFloat16To32(float16Data[useConstIdx ? constIdx : numIdx])); - resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data)))); + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(inputData)))); resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data)))); resources.verifyIO = check32BitFloats; resources.inputs.back().first = CAPABILITIES[capIdx].dtype; @@ -5388,7 +5521,7 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro const StringTemplate decoration ( "OpDecorate %ra_v2f32 ArrayStride 8\n" - "OpDecorate %ra_v2f16 ArrayStride 4\n" + "OpDecorate %ra_v2f16 ArrayStride ${arraystride}\n" "OpMemberDecorate %SSBO32 0 Offset 0\n" "OpMemberDecorate %SSBO16 0 Offset 0\n" "OpDecorate %SSBO32 BufferBlock\n" @@ -5434,6 +5567,8 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro "OpFunctionEnd\n"); + const deUint32 arrayStrides[] = {4, 16}; + for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx) { for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx) @@ -5446,6 +5581,7 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro specs["cap"] = CAPABILITIES[capIdx].cap; specs["indecor"] = CAPABILITIES[capIdx].decor; + specs["arraystride"] = de::toString(arrayStrides[capIdx]); specs["constarrayidx"] = de::toString(constIdx); if (useConstIdx) specs["arrayindex"] = "c_i32_ci"; @@ -5457,12 +5593,21 @@ void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGro fragments["pre_main"] = preMain.specialize(specs); fragments["testfun"] = testFun.specialize(specs); + vector inputData; + for (size_t dataIdx = 0; dataIdx < float16Data.size() / 2; ++dataIdx) + { + inputData.push_back(float16Data[dataIdx * 2]); + inputData.push_back(float16Data[dataIdx * 2 + 1]); + for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 2; ++padIdx) + inputData.push_back(deFloat16(0.0f)); + } + vector float32Data; float32Data.reserve(numDataPoints); for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx) float32Data.push_back(deFloat16To32(float16Data[constantIndices[constIndexIdx].useConstantIndex ? (constantIndices[constIndexIdx].constantIndex * 2 + numIdx % 2) : numIdx])); - resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data)))); + resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(inputData)))); resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data)))); resources.verifyIO = check32BitFloats; resources.inputs.back().first = CAPABILITIES[capIdx].dtype; -- 2.7.4