template<RoundingModeFlags RoundingMode>
bool graphicsCheck16BitFloats (const std::vector<Resource>& originalFloats,
const vector<AllocationSp>& outputAllocs,
- const std::vector<Resource>& /* expectedOutputs */,
+ const std::vector<Resource>& expectedOutputs,
tcu::TestLog& log)
{
if (outputAllocs.size() != originalFloats.size())
const deUint16* returned = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
const float* original = reinterpret_cast<const float*>(&originalBytes.front());
- const deUint32 count = static_cast<deUint32>(originalBytes.size() / sizeof(float));
+ const deUint32 count = static_cast<deUint32>(expectedOutputs[outputNdx].second->getByteSize() / sizeof(deUint16));
+ const deUint32 inputStride = static_cast<deUint32>(originalBytes.size() / sizeof(float)) / count;
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
- if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
+ if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
return false;
}
template<RoundingModeFlags RoundingMode>
bool computeCheck16BitFloats (const std::vector<BufferSp>& originalFloats,
const vector<AllocationSp>& outputAllocs,
- const std::vector<BufferSp>& /* expectedOutputs */,
+ const std::vector<BufferSp>& expectedOutputs,
tcu::TestLog& log)
{
if (outputAllocs.size() != originalFloats.size())
const deUint16* returned = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
const float* original = reinterpret_cast<const float*>(&originalBytes.front());
- const deUint32 count = static_cast<deUint32>(originalBytes.size() / sizeof(float));
+ const deUint32 count = static_cast<deUint32>(expectedOutputs[outputNdx]->getByteSize() / sizeof(deUint16));
+ const deUint32 inputStride = static_cast<deUint32>(originalBytes.size() / sizeof(float)) / count;
for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
- if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
+ if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
return false;
}
bool useConstantIndex;
unsigned constantIndex;
unsigned count;
+ unsigned inputStride;
};
- const CompositeType cTypes[] =
+ const CompositeType cTypes[2][5] =
{
- {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0, numElements},
- {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements},
- {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements},
- {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n", false, 0, numElements / 2},
- {"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8},
+ {
+ {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0, numElements, 1},
+ {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements, 1},
+ {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements, 1},
+ {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n", false, 0, numElements / 2, 2},
+ {"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8, 8}
+ },
+ {
+ {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", false, 0, numElements, 8},
+ {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 5, numElements, 8},
+ {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 8, numElements, 8},
+ {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 16\n", false, 0, numElements / 2, 8},
+ {"matrix", "v2f32", "v2f16", "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0, numElements / 8, 8}
+ }
};
- vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
- vector<float> float32Data;
-
- float32Data.reserve(numElements);
- for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
- float32Data.push_back(deFloat16To32(float16Data[numIdx]));
-
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
- for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
+ for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
- string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float";
+ string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float";
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
- specs["stride"] = cTypes[tyIdx].stride;
- specs["base32"] = cTypes[tyIdx].base32;
- specs["base16"] = cTypes[tyIdx].base16;
+ specs["stride"] = cTypes[capIdx][tyIdx].stride;
+ specs["base32"] = cTypes[capIdx][tyIdx].base32;
+ specs["base16"] = cTypes[capIdx][tyIdx].base16;
specs["types"] = floatTypes;
specs["convert"] = "OpFConvert";
- specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
- if (cTypes[tyIdx].useConstantIndex)
+ specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex);
+ if (cTypes[capIdx][tyIdx].useConstantIndex)
specs["arrayindex"] = "c_i32_ci";
else
specs["arrayindex"] = "x";
+ const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride;
+ const deUint32 count = cTypes[capIdx][tyIdx].count;
+ const deUint32 scalarsPerItem = numElements / count;
+ vector<deFloat16> float16Data = getFloat16s(rnd, numElements * inputStride);
+ vector<float> float32Data;
+
+ float32Data.reserve(numElements);
+ for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
+ for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
+ float32Data.push_back(deFloat16To32(float16Data[numIdx * inputStride + scalarIdx]));
+
vector<float> float32DataConstIdx;
- if (cTypes[tyIdx].useConstantIndex)
+ if (cTypes[capIdx][tyIdx].useConstantIndex)
{
- const deUint32 numFloats = numElements / cTypes[tyIdx].count;
+ const deUint32 numFloats = numElements / cTypes[capIdx][tyIdx].count;
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
- float32DataConstIdx.push_back(float32Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]);
+ float32DataConstIdx.push_back(float32Data[cTypes[capIdx][tyIdx].constantIndex * numFloats + numIdx % numFloats]);
}
- if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
+ if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
{
specs["index0"] = "%zero";
specs["matrix_prefix"] = "m4";
}
spec.assembly = shaderTemplate.specialize(specs);
- spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
+ spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.verifyIO = check32BitFloats;
spec.inputTypes[0] = CAPABILITIES[capIdx].dtype;
spec.inputs.push_back(BufferSp(new Float16Buffer(float16Data)));
- spec.outputs.push_back(BufferSp(new Float32Buffer(cTypes[tyIdx].useConstantIndex ? float32DataConstIdx : float32Data)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(cTypes[capIdx][tyIdx].useConstantIndex ? float32DataConstIdx : float32Data)));
spec.extensions.push_back("VK_KHR_16bit_storage");
spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
bool useConstantIndex;
unsigned constantIndex;
unsigned count;
+ unsigned inputStride;
};
- const CompositeType cTypes[] =
+ const CompositeType cTypes[2][8] =
{
- {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements},
- {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements},
- {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements},
- {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements},
- {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements},
- {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements},
- {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n", false, 0, numElements / 4},
- {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n", false, 0, numElements / 4}
+ {
+ {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements, 1},
+ {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements, 1},
+ {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements, 1},
+ {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements, 1},
+ {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements, 1},
+ {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements, 1},
+ {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n", false, 0, numElements / 4, 4},
+ {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n", false, 0, numElements / 4, 4}
+ },
+ {
+ {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", false, 0, numElements, 8},
+ {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 5, numElements, 8},
+ {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 8, numElements, 8},
+ {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", false, 0, numElements, 8},
+ {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 5, numElements, 8},
+ {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 8, numElements, 8},
+ {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert", "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 16\n", false, 0, numElements / 4, 8},
+ {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert", "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 16\n", false, 0, numElements / 4, 8}
+ }
};
- vector<deInt16> inputs = getInt16s(rnd, numElements);
- vector<deInt32> sOutputs;
- vector<deInt32> uOutputs;
- const deUint16 signBitMask = 0x8000;
- const deUint32 signExtendMask = 0xffff0000;
-
- sOutputs.reserve(inputs.size());
- uOutputs.reserve(inputs.size());
-
- for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
- {
- uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
- if (inputs[numNdx] & signBitMask)
- sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
- else
- sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
- }
-
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
- for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
+ for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
- string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
+ string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
+ const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride;
+ vector<deInt16> inputs = getInt16s(rnd, numElements * inputStride);
+ vector<deInt32> sOutputs;
+ vector<deInt32> uOutputs;
+ const deUint16 signBitMask = 0x8000;
+ const deUint32 signExtendMask = 0xffff0000;
+ const deUint32 count = cTypes[capIdx][tyIdx].count;
+ const deUint32 scalarsPerItem = numElements / count;
+
+ sOutputs.reserve(numElements);
+ uOutputs.reserve(numElements);
+
+ for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
+ for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; ++scalarIdx)
+ {
+ const deInt16 input = inputs[numNdx * inputStride + scalarIdx];
+
+ uOutputs.push_back(static_cast<deUint16>(input));
+ if (input & signBitMask)
+ sOutputs.push_back(static_cast<deInt32>(input | signExtendMask));
+ else
+ sOutputs.push_back(static_cast<deInt32>(input));
+ }
+
vector<deInt32> intDataConstIdx;
- if (cTypes[tyIdx].useConstantIndex)
+ if (cTypes[capIdx][tyIdx].useConstantIndex)
{
- const deUint32 numInts = numElements / cTypes[tyIdx].count;
-
for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
{
- const deInt32 idx = cTypes[tyIdx].constantIndex * numInts + numIdx % numInts;
+ const deInt32 idx = cTypes[capIdx][tyIdx].constantIndex * scalarsPerItem + numIdx % scalarsPerItem;
- if (cTypes[tyIdx].isSigned)
+ if (cTypes[capIdx][tyIdx].isSigned)
intDataConstIdx.push_back(sOutputs[idx]);
else
intDataConstIdx.push_back(uOutputs[idx]);
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
- specs["stride"] = cTypes[tyIdx].stride;
- specs["base32"] = cTypes[tyIdx].base32;
- specs["base16"] = cTypes[tyIdx].base16;
- specs["types"] = cTypes[tyIdx].types;
- specs["convert"] = cTypes[tyIdx].opcode;
- specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
- if (cTypes[tyIdx].useConstantIndex)
+ specs["stride"] = cTypes[capIdx][tyIdx].stride;
+ specs["base32"] = cTypes[capIdx][tyIdx].base32;
+ specs["base16"] = cTypes[capIdx][tyIdx].base16;
+ specs["types"] = cTypes[capIdx][tyIdx].types;
+ specs["convert"] = cTypes[capIdx][tyIdx].opcode;
+ specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex);
+ if (cTypes[capIdx][tyIdx].useConstantIndex)
specs["arrayindex"] = "c_i32_ci";
else
specs["arrayindex"] = "x";
spec.assembly = shaderTemplate.specialize(specs);
- spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
+ spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.inputTypes[0] = CAPABILITIES[capIdx].dtype;
spec.inputs.push_back(BufferSp(new Int16Buffer(inputs)));
- if (cTypes[tyIdx].useConstantIndex)
+ if (cTypes[capIdx][tyIdx].useConstantIndex)
spec.outputs.push_back(BufferSp(new Int32Buffer(intDataConstIdx)));
- else if (cTypes[tyIdx].isSigned)
+ else if (cTypes[capIdx][tyIdx].isSigned)
spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
else
spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
{
tcu::TestContext& testCtx = group->getTestContext();
de::Random rnd (deStringHash(group->getName()));
- const deUint32 structSize = 24; // In number of 16bit items. Includes padding.
+ const deUint32 structSize = 128; // In number of 16bit items. Includes padding.
vector<deFloat16> inputDataFloat = getFloat16s(rnd, structSize * 4);
vector<deInt16> inputDataInt = getInt16s(rnd, structSize * 4);
vector<float> outputDataFloat;
" OpDecorate %Output BufferBlock\n"
" OpDecorate %dataOutput DescriptorSet 0\n"
" OpDecorate %dataOutput Binding 1\n"
- " OpDecorate %scalarArray ArrayStride 2\n"
- " OpDecorate %scalarArray2D ArrayStride 8\n"
+ " OpDecorate %scalarArray ArrayStride 16\n"
+ " OpDecorate %scalarArray2D ArrayStride 48\n"
" OpMemberDecorate %S 0 Offset 0\n"
- " OpMemberDecorate %S 1 Offset 8\n"
+ " OpMemberDecorate %S 1 Offset 48\n"
" ${decoration:opt}\n"
- " OpMemberDecorate %S 2 Offset 40\n"
- " OpDecorate %_arr_S_uint_4 ArrayStride 48\n"
+ " OpMemberDecorate %S 2 Offset 240\n"
+ " OpDecorate %_arr_S_uint_4 ArrayStride 256\n"
" OpMemberDecorate %Input 0 Offset 0\n"
" OpMemberDecorate %Output 0 Offset 0\n"
" OpDecorate %Input ${storage}\n"
const deUint32 signExtendMask = 0xffff0000;
// Determine the selected output float for the selected indices.
const tcu::UVec4 vec = indices[numIdx];
- // Offsets are in multiples of 16bits.
- const deUint32 fieldOffsets[3][3] =
+ // Offsets are in multiples of 16bits. Floats are using matrix as the
+ // second field, which has different layout rules than 2D array.
+ // Therefore separate offset tables are needed.
+ const deUint32 fieldOffsetsFloat[3][3] =
{
- {0u, 1u, 0u},
- {4u, 4u, 1u},
- {20u, 1u, 0u}
+ {0u, 8u, 0u},
+ {24, 24u, 1u},
+ {120u, 1u, 0u}
};
- const deUint32 offset = vec.x() * structSize + fieldOffsets[vec.y()][0] + fieldOffsets[vec.y()][1] * vec.z() + fieldOffsets[vec.y()][2] * vec.w();
- const bool hasSign = inputDataInt[offset] & signBitMask;
+ const deUint32 fieldOffsetsInt[3][3] =
+ {
+ {0u, 8u, 0u},
+ {24, 24u, 8u},
+ {120u, 1u, 0u}
+ };
+ const deUint32 offsetFloat = vec.x() * structSize + fieldOffsetsFloat[vec.y()][0] + fieldOffsetsFloat[vec.y()][1] * vec.z() + fieldOffsetsFloat[vec.y()][2] * vec.w();
+ const deUint32 offsetInt = vec.x() * structSize + fieldOffsetsInt[vec.y()][0] + fieldOffsetsInt[vec.y()][1] * vec.z() + fieldOffsetsInt[vec.y()][2] * vec.w();
+ const bool hasSign = inputDataInt[offsetInt] & signBitMask;
- outputDataFloat.push_back(deFloat16To32(inputDataFloat[offset]));
- outputDataUInt.push_back((deUint16)inputDataInt[offset]);
- outputDataSInt.push_back((deInt32)(inputDataInt[offset] | (hasSign ? signExtendMask : 0u)));
+ outputDataFloat.push_back(deFloat16To32(inputDataFloat[offsetFloat]));
+ outputDataUInt.push_back((deUint16)inputDataInt[offsetInt]);
+ outputDataSInt.push_back((deInt32)(inputDataInt[offsetInt] | (hasSign ? signExtendMask : 0u)));
}
for (deUint32 indicesIdx = 0; indicesIdx < (deUint32)indices.size(); ++indicesIdx)
if (dataTypeIdx == 0)
{
spec.verifyIO = check32BitFloats;
- specs["decoration"] = "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 8\n";
+ specs["decoration"] = "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 48\n";
}
spec.assembly = shaderTemplate.specialize(specs);
"${stride}"
- "OpDecorate %PC16 Block\n"
+ "OpDecorate %PC16 BufferBlock\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
map<string, string> fragments;
const deUint32 numDataPoints = 256;
RGBA defaultColors[4];
- GraphicsResources resources;
vector<string> extensions;
const StringTemplate capabilities ("OpCapability ${cap}\n");
// inputs and outputs are declared to be vectors of signed integers.
for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
- resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs))));
- resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs))));
extensions.push_back("VK_KHR_16bit_storage");
fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
" %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
const StringTemplate scalarDecoration(
- "OpDecorate %ra_i32 ArrayStride 4\n"
+ "OpDecorate %ra_i32 ArrayStride ${arraystride}\n"
"OpDecorate %ra_i16 ArrayStride 2\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpFunctionEnd\n");
- struct Category
+ // Scalar
{
- const char* name;
- const StringTemplate& preMain;
- const StringTemplate& decoration;
- const StringTemplate& testFunction;
- };
+ const deUint32 arrayStrides[] = {4, 16};
+
+ for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
+ for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
+ {
+ map<string, string> specs;
+ string name = string(CAPABILITIES[capIdx].name) + "_scalar_" + intFacts[factIdx].name;
- const Category categories[] =
+ specs["cap"] = CAPABILITIES[capIdx].cap;
+ specs["indecor"] = CAPABILITIES[capIdx].decor;
+ specs["itype32"] = intFacts[factIdx].type32;
+ specs["v4itype32"] = "%v4" + string(intFacts[factIdx].type32).substr(1);
+ specs["itype16"] = intFacts[factIdx].type16;
+ specs["signed"] = intFacts[factIdx].isSigned;
+ specs["convert"] = intFacts[factIdx].opcode;
+ specs["arraystride"] = de::toString(arrayStrides[capIdx]);
+
+ fragments["pre_main"] = scalarPreMain.specialize(specs);
+ fragments["testfun"] = scalarTestFunc.specialize(specs);
+ fragments["capability"] = capabilities.specialize(specs);
+ fragments["decoration"] = scalarDecoration.specialize(specs);
+
+ vector<deInt32> inputsPadded;
+ for (size_t dataIdx = 0; dataIdx < inputs.size(); ++dataIdx)
+ {
+ inputsPadded.push_back(inputs[dataIdx]);
+ for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 4 - 1; ++padIdx)
+ inputsPadded.push_back(0);
+ }
+ GraphicsResources resources;
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputsPadded))));
+ resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs))));
+
+ resources.inputs.back().first = CAPABILITIES[capIdx].dtype;
+
+ createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
+ }
+ }
+ // Vector
{
- {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc},
- {"vector", vecPreMain, vecDecoration, vecTestFunc},
- };
+ GraphicsResources resources;
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs))));
+ resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs))));
- for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
{
map<string, string> specs;
- string name = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
+ string name = string(CAPABILITIES[capIdx].name) + "_vector_" + intFacts[factIdx].name;
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
specs["signed"] = intFacts[factIdx].isSigned;
specs["convert"] = intFacts[factIdx].opcode;
- fragments["pre_main"] = categories[catIdx].preMain.specialize(specs);
- fragments["testfun"] = categories[catIdx].testFunction.specialize(specs);
+ fragments["pre_main"] = vecPreMain.specialize(specs);
+ fragments["testfun"] = vecTestFunc.specialize(specs);
fragments["capability"] = capabilities.specialize(specs);
- fragments["decoration"] = categories[catIdx].decoration.specialize(specs);
+ fragments["decoration"] = vecDecoration.specialize(specs);
resources.inputs.back().first = CAPABILITIES[capIdx].dtype;
createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
}
+ }
}
void addCompute16bitStorageUniform16To16Group (tcu::TestCaseGroup* group)
const char* base16;
const char* stride;
unsigned count;
+ unsigned inputStride;
};
- const CompositeType cTypes[] =
+ const CompositeType cTypes[2][3] =
{
- {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", numElements},
- {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4},
- {"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8},
+ { // BufferBlock
+ {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", numElements, 1},
+ {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4, 1},
+ {"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1}
+ },
+ { // Block
+ {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 16\nOpDecorate %f16arr ArrayStride 2\n", numElements, 4},
+ {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n", numElements / 4, 1},
+ {"matrix", "v4f32", "v4f16", "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1}
+ }
};
- vector<float> float32Data = getFloat32s(rnd, numElements);
vector<deFloat16> float16DummyData (numElements, 0);
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
- for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
+ for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
- string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
+ string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
+ vector<float> float32Data = getFloat32s(rnd, numElements * cTypes[capIdx][tyIdx].inputStride);
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
- specs["stride"] = cTypes[tyIdx].stride;
- specs["base32"] = cTypes[tyIdx].base32;
- specs["base16"] = cTypes[tyIdx].base16;
+ specs["stride"] = cTypes[capIdx][tyIdx].stride;
+ specs["base32"] = cTypes[capIdx][tyIdx].base32;
+ specs["base16"] = cTypes[capIdx][tyIdx].base16;
specs["rounding"] = rndModes[rndModeIdx].decor;
specs["types"] = floatTypes;
specs["convert"] = "OpFConvert";
- if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
+ if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
{
if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
specs["rounding"] += "\nOpDecorate %val16_1 FPRoundingMode RTZ\n";
}
spec.assembly = shaderTemplate.specialize(specs);
- spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
+ spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.verifyIO = rndModes[rndModeIdx].func;
spec.inputTypes[0] = CAPABILITIES[capIdx].dtype;
const char* opcode;
const char* stride;
unsigned count;
+ unsigned inputStride;
};
- const CompositeType cTypes[] =
+ const CompositeType cTypes[2][4] =
{
- {"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", numElements},
- {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", numElements},
- {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2},
- {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2},
+ {
+ {"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", numElements, 1},
+ {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", numElements, 1},
+ {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 2},
+ {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 2}
+ },
+ {
+ {"scalar_sint", sintTypes, "i32", "i16", "OpSConvert", "OpDecorate %i32arr ArrayStride 16\nOpDecorate %i16arr ArrayStride 2\n", numElements, 4},
+ {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert", "OpDecorate %u32arr ArrayStride 16\nOpDecorate %u16arr ArrayStride 2\n", numElements, 4},
+ {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert", "OpDecorate %v2i32arr ArrayStride 16\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 4},
+ {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert", "OpDecorate %v2u32arr ArrayStride 16\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 4}
+ }
};
- vector<deInt32> inputs = getInt32s(rnd, numElements);
- vector<deInt16> outputs;
-
- outputs.reserve(inputs.size());
- for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
- outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
-
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
- for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
+ for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
{
ComputeShaderSpec spec;
map<string, string> specs;
- string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
+ string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
+ const deUint32 inputStride = cTypes[capIdx][tyIdx].inputStride;
+ const deUint32 count = cTypes[capIdx][tyIdx].count;
+ const deUint32 scalarsPerItem = numElements / count;
+
+ vector<deInt32> inputs = getInt32s(rnd, numElements * inputStride);
+ vector<deInt16> outputs;
+
+ outputs.reserve(numElements);
+ for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
+ for (deUint32 scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
+ outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx * inputStride + scalarIdx]));
specs["capability"] = CAPABILITIES[capIdx].cap;
specs["storage"] = CAPABILITIES[capIdx].decor;
- specs["stride"] = cTypes[tyIdx].stride;
- specs["base32"] = cTypes[tyIdx].base32;
- specs["base16"] = cTypes[tyIdx].base16;
- specs["types"] = cTypes[tyIdx].types;
- specs["convert"] = cTypes[tyIdx].opcode;
+ specs["stride"] = cTypes[capIdx][tyIdx].stride;
+ specs["base32"] = cTypes[capIdx][tyIdx].base32;
+ specs["base16"] = cTypes[capIdx][tyIdx].base16;
+ specs["types"] = cTypes[capIdx][tyIdx].types;
+ specs["convert"] = cTypes[capIdx][tyIdx].opcode;
spec.assembly = shaderTemplate.specialize(specs);
- spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
+ spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
spec.inputTypes[0] = CAPABILITIES[capIdx].dtype;
spec.inputs.push_back(BufferSp(new Int32Buffer(inputs)));
{
de::Random rnd (deStringHash(testGroup->getName()));
map<string, string> fragments;
- GraphicsResources resources;
vector<string> extensions;
const deUint32 numDataPoints = 256;
RGBA defaultColors[4];
- vector<float> float32Data = getFloat32s(rnd, numDataPoints);
+ const vector<float> float32Data = getFloat32s(rnd, numDataPoints);
+ vector<float> float32DataPadded;
vector<deFloat16> float16DummyData (numDataPoints, 0);
const StringTemplate capabilities ("OpCapability ${cap}\n");
- resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
- // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
- resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData))));
+ for (size_t dataIdx = 0; dataIdx < float32Data.size(); ++dataIdx)
+ {
+ float32DataPadded.push_back(float32Data[dataIdx]);
+ float32DataPadded.push_back(0.0f);
+ float32DataPadded.push_back(0.0f);
+ float32DataPadded.push_back(0.0f);
+ }
extensions.push_back("VK_KHR_16bit_storage");
fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
" %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
const StringTemplate decoration (
- "OpDecorate %ra_f32 ArrayStride 4\n"
+ "OpDecorate %ra_f32 ArrayStride ${arraystride}\n"
"OpDecorate %ra_f16 ArrayStride 2\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpFunctionEnd\n";
- const RndMode rndModes[] =
+ const RndMode rndModes[] =
{
{"rtz", "OpDecorate %val16 FPRoundingMode RTZ", graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
{"rte", "OpDecorate %val16 FPRoundingMode RTE", graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
{"unspecified_rnd_mode", "", graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
};
+ const deUint32 arrayStrides[] = {4, 16};
+
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
{
map<string, string> specs;
string testName = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
+ GraphicsResources resources;
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(arrayStrides[capIdx] == 4 ? float32Data : float32DataPadded))));
+ // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
+ resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData))));
+
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
+ specs["arraystride"] = de::toString(arrayStrides[capIdx]);
specs["rounddecor"] = rndModes[rndModeIdx].decor;
fragments["capability"] = capabilities.specialize(specs);
resources.inputs.back().first = CAPABILITIES[capIdx].dtype;
resources.verifyIO = rndModes[rndModeIdx].f;
-
createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
}
}
+ // Non-scalar cases can use the same resources.
+ GraphicsResources resources;
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
+ // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
+ resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData))));
+
{ // vector cases
fragments["pre_main"] =
" %f16 = OpTypeFloat 16\n"
for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
{
- string testName = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
+ string testName = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
{
"OpDecorate %a64f32 ArrayStride 4\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
- "OpDecorate %PC16 Block\n"
+ "OpDecorate %PC16 BufferBlock\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n";
"OpDecorate %a16v4f32 ArrayStride 16\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
- "OpDecorate %PC16 Block\n"
+ "OpDecorate %PC16 BufferBlock\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n";
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO32 0 ColMajor\n"
"OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
- "OpDecorate %PC16 Block\n"
+ "OpDecorate %PC16 BufferBlock\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpMemberDecorate %PC16 0 ColMajor\n"
"OpMemberDecorate %PC16 0 MatrixStride 8\n"
"OpDecorate %a${count}${type32} ArrayStride 4\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
- "OpDecorate %PC16 Block\n"
+ "OpDecorate %PC16 BufferBlock\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n");
"OpDecorate %a${count}${type32} ArrayStride 8\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
- "OpDecorate %PC16 Block\n"
+ "OpDecorate %PC16 BufferBlock\n"
"OpMemberDecorate %PC16 0 Offset 0\n"
"OpDecorate %ssbo32 DescriptorSet 0\n"
"OpDecorate %ssbo32 Binding 0\n");
vector<deInt16> inputs = getInt16s(rnd, numDataPoints);
vector<deInt32> sOutputs;
vector<deInt32> uOutputs;
- GraphicsResources resources;
vector<string> extensions;
const deUint16 signBitMask = 0x8000;
const deUint32 signExtendMask = 0xffff0000;
sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
}
- resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputs))));
-
extensions.push_back("VK_KHR_16bit_storage");
fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
const StringTemplate scalarDecoration (
"OpDecorate %ra_i32 ArrayStride 4\n"
- "OpDecorate %ra_i16 ArrayStride 2\n"
+ "OpDecorate %ra_i16 ArrayStride ${arraystride}\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
const StringTemplate vecDecoration (
"OpDecorate %ra_v2i32 ArrayStride 8\n"
- "OpDecorate %ra_v2i16 ArrayStride 4\n"
+ "OpDecorate %ra_v2i16 ArrayStride ${arraystride}\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpDecorate %ssbo32 Binding 1\n"
"OpDecorate %ssbo16 Binding 0\n");
- const StringTemplate vecTestFunc (
+ const StringTemplate vecTestFunc (
"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
" %param = OpFunctionParameter %v4f32\n"
const deUint32 numElements;
};
- const Category categories[] =
+ const Category categories[] =
{
{"scalar", scalarPreMain, scalarDecoration, scalarTestFunc, 1},
{"vector", vecPreMain, vecDecoration, vecTestFunc, 2},
};
+ const deUint32 minArrayStride[] = {2, 16};
+
for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
deUint32 constIdx = constantIndices[constIndexIdx].constantIndex;
map<string, string> specs;
string name = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
+ const deUint32 numElements = categories[catIdx].numElements;
+ const deUint32 arrayStride = de::max(numElements * 2, minArrayStride[capIdx]);
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
+ specs["arraystride"] = de::toString(arrayStride);
specs["itype32"] = intFacts[factIdx].type32;
specs["v2itype32"] = "%v2" + string(intFacts[factIdx].type32).substr(1);
specs["v3itype32"] = "%v3" + string(intFacts[factIdx].type32).substr(1);
fragments["capability"] = capabilities.specialize(specs);
fragments["decoration"] = categories[catIdx].decoration.specialize(specs);
+ GraphicsResources resources;
+ vector<deInt16> inputsPadded;
+ for (size_t dataIdx = 0; dataIdx < inputs.size() / numElements; ++dataIdx)
+ {
+ for (deUint32 elementIdx = 0; elementIdx < numElements; ++elementIdx)
+ inputsPadded.push_back(inputs[dataIdx * numElements + elementIdx]);
+ for (deUint32 padIdx = 0; padIdx < arrayStride / 2 - numElements; ++padIdx)
+ inputsPadded.push_back(0);
+ }
+
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputsPadded))));
+
vector<deInt32> constIdxOutputs;
if (useConstIdx)
{
- const deUint32 numElements = categories[catIdx].numElements;
name += string("_const_idx_") + de::toString(constIdx);
for (deUint32 i = 0; i < numDataPoints; i++)
{
const StringTemplate decoration (
"OpDecorate %ra_f32 ArrayStride 4\n"
- "OpDecorate %ra_f16 ArrayStride 2\n"
+ "OpDecorate %ra_f16 ArrayStride ${arraystride}\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpFunctionEnd\n");
+ const deUint32 arrayStrides[] = {2, 16};
+
for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
{
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
+ specs["arraystride"] = de::toString(arrayStrides[capIdx]);
specs["constarrayidx"] = de::toString(constIdx);
if (useConstIdx)
specs["arrayindex"] = "c_i32_ci";
fragments["pre_main"] = preMain.specialize(specs);
fragments["testfun"] = testFun.specialize(specs);
+ vector<deFloat16> inputData;
+ for (size_t dataIdx = 0; dataIdx < float16Data.size(); ++dataIdx)
+ {
+ inputData.push_back(float16Data[dataIdx]);
+ for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 1; ++padIdx)
+ inputData.push_back(deFloat16(0.0f));
+ }
+
vector<float> float32Data;
float32Data.reserve(numDataPoints);
for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
float32Data.push_back(deFloat16To32(float16Data[useConstIdx ? constIdx : numIdx]));
- resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data))));
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(inputData))));
resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
resources.verifyIO = check32BitFloats;
resources.inputs.back().first = CAPABILITIES[capIdx].dtype;
const StringTemplate decoration (
"OpDecorate %ra_v2f32 ArrayStride 8\n"
- "OpDecorate %ra_v2f16 ArrayStride 4\n"
+ "OpDecorate %ra_v2f16 ArrayStride ${arraystride}\n"
"OpMemberDecorate %SSBO32 0 Offset 0\n"
"OpMemberDecorate %SSBO16 0 Offset 0\n"
"OpDecorate %SSBO32 BufferBlock\n"
"OpFunctionEnd\n");
+ const deUint32 arrayStrides[] = {4, 16};
+
for (deUint32 constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
{
for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
specs["cap"] = CAPABILITIES[capIdx].cap;
specs["indecor"] = CAPABILITIES[capIdx].decor;
+ specs["arraystride"] = de::toString(arrayStrides[capIdx]);
specs["constarrayidx"] = de::toString(constIdx);
if (useConstIdx)
specs["arrayindex"] = "c_i32_ci";
fragments["pre_main"] = preMain.specialize(specs);
fragments["testfun"] = testFun.specialize(specs);
+ vector<deFloat16> inputData;
+ for (size_t dataIdx = 0; dataIdx < float16Data.size() / 2; ++dataIdx)
+ {
+ inputData.push_back(float16Data[dataIdx * 2]);
+ inputData.push_back(float16Data[dataIdx * 2 + 1]);
+ for (deUint32 padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 2; ++padIdx)
+ inputData.push_back(deFloat16(0.0f));
+ }
+
vector<float> float32Data;
float32Data.reserve(numDataPoints);
for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
float32Data.push_back(deFloat16To32(float16Data[constantIndices[constIndexIdx].useConstantIndex ? (constantIndices[constIndexIdx].constantIndex * 2 + numIdx % 2) : numIdx]));
- resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data))));
+ resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(inputData))));
resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
resources.verifyIO = check32BitFloats;
resources.inputs.back().first = CAPABILITIES[capIdx].dtype;