#include "tcuStringTemplate.hpp"
#include "tcuTestLog.hpp"
#include "tcuVectorUtil.hpp"
+#include "tcuInterval.hpp"
#include "vkDefs.hpp"
#include "vkDeviceUtil.hpp"
#include "vkStrUtil.hpp"
#include "vkTypeUtil.hpp"
-#include "deRandom.hpp"
#include "deStringUtil.hpp"
#include "deUniquePtr.hpp"
+#include "deMath.h"
#include "tcuStringTemplate.hpp"
#include "vktSpvAsm16bitStorageTests.hpp"
+#include "vktSpvAsmUboMatrixPaddingTests.hpp"
+#include "vktSpvAsmConditionalBranchTests.hpp"
+#include "vktSpvAsmIndexingTests.hpp"
+#include "vktSpvAsmImageSamplerTests.hpp"
#include "vktSpvAsmComputeShaderCase.hpp"
#include "vktSpvAsmComputeShaderTestUtil.hpp"
#include "vktSpvAsmGraphicsShaderTestUtil.hpp"
#include <string>
#include <sstream>
#include <utility>
+#include <stack>
namespace vkt
{
}
}
+// Gets a 64-bit integer with a more logarithmic distribution
+deInt64 randomInt64LogDistributed (de::Random& rnd)
+{
+ deInt64 val = rnd.getUint64();
+ val &= (1ull << rnd.getInt(1, 63)) - 1;
+ if (rnd.getBool())
+ val = -val;
+ return val;
+}
+
+static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
+{
+ for (int ndx = 0; ndx < numValues; ndx++)
+ dst[ndx] = randomInt64LogDistributed(rnd);
+}
+
+template<typename FilterT>
+static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
+{
+ for (int ndx = 0; ndx < numValues; ndx++)
+ {
+ deInt64 value;
+ do {
+ value = randomInt64LogDistributed(rnd);
+ } while (!filter(value));
+ dst[ndx] = value;
+ }
+}
+
+inline bool filterNonNegative (const deInt64 value)
+{
+ return value >= 0;
+}
+
+inline bool filterPositive (const deInt64 value)
+{
+ return value > 0;
+}
+
+inline bool filterNotZero (const deInt64 value)
+{
+ return value != 0;
+}
+
static void floorAll (vector<float>& values)
{
for (size_t i = 0; i < values.size(); i++)
CaseParameter (const char* case_, const string& param_) : name(case_), param(param_) {}
};
-// Assembly code used for testing OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
+// Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
//
// #version 430
//
// output_data.elements[x] = -input_data.elements[x];
// }
+static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
+{
+ std::ostringstream out;
+ out << getComputeAsmShaderPreambleWithoutLocalSize();
+
+ if (useLiteralLocalSize)
+ {
+ out << "OpExecutionMode %main LocalSize "
+ << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
+ }
+
+ out << "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+ "OpDecorate %id BuiltIn GlobalInvocationId\n";
+
+ if (useSpecConstantWorkgroupSize)
+ {
+ out << "OpDecorate %spec_0 SpecId 100\n"
+ << "OpDecorate %spec_1 SpecId 101\n"
+ << "OpDecorate %spec_2 SpecId 102\n"
+ << "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
+ }
+
+ out << getComputeAsmInputOutputBufferTraits()
+ << getComputeAsmCommonTypes()
+ << getComputeAsmInputOutputBuffer()
+ << "%id = OpVariable %uvec3ptr Input\n"
+ << "%zero = OpConstant %i32 0 \n";
+
+ if (useSpecConstantWorkgroupSize)
+ {
+ out << "%spec_0 = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
+ << "%spec_1 = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
+ << "%spec_2 = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
+ << "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
+ }
+
+ out << "%main = OpFunction %void None %voidf\n"
+ << "%label = OpLabel\n"
+ << "%idval = OpLoad %uvec3 %id\n"
+ << "%ndx = OpCompositeExtract %u32 %idval " << ndx << "\n"
+
+ "%inloc = OpAccessChain %f32ptr %indata %zero %ndx\n"
+ "%inval = OpLoad %f32 %inloc\n"
+ "%neg = OpFNegate %f32 %inval\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %ndx\n"
+ " OpStore %outloc %neg\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+ return out.str();
+}
+
+tcu::TestCaseGroup* createLocalSizeGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "localsize", ""));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const deUint32 numElements = 64u;
+ vector<float> positiveFloats (numElements, 0);
+ vector<float> negativeFloats (numElements, 0);
+
+ fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ negativeFloats[ndx] = -positiveFloats[ndx];
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
+
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+
+ spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, 1), 0u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, 1), 0u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, 1), 0u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
+
+ spec.numWorkGroups = IVec3(1, 1, 1);
+
+ spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(numElements, 1, 1), 0u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(numElements, 1, 1), 0u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(numElements, 1, 1), 0u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, numElements, 1), 1u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, numElements, 1), 1u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, numElements, 1), 1u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, numElements), 2u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, numElements), 2u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
+
+ spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, numElements), 2u);
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
+
+ return group.release();
+}
+
tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
{
de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
if (outputAllocs.size() != 1)
return false;
- const BufferSp& expectedOutput = expectedOutputs[0];
- const deInt32* expectedOutputAsInt = static_cast<const deInt32*>(expectedOutputs[0]->data());
- const deInt32* outputAsInt = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
- const float* input1AsFloat = static_cast<const float*>(inputs[0]->data());
- const float* input2AsFloat = static_cast<const float*>(inputs[1]->data());
- bool returnValue = true;
+ vector<deUint8> input1Bytes;
+ vector<deUint8> input2Bytes;
+ vector<deUint8> expectedBytes;
+
+ inputs[0]->getBytes(input1Bytes);
+ inputs[1]->getBytes(input2Bytes);
+ expectedOutputs[0]->getBytes(expectedBytes);
+
+ const deInt32* const expectedOutputAsInt = reinterpret_cast<const deInt32* const>(&expectedBytes.front());
+ const deInt32* const outputAsInt = static_cast<const deInt32* const>(outputAllocs[0]->getHostPtr());
+ const float* const input1AsFloat = reinterpret_cast<const float* const>(&input1Bytes.front());
+ const float* const input2AsFloat = reinterpret_cast<const float* const>(&input2Bytes.front());
+ bool returnValue = true;
- for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(deInt32); ++idx)
+ for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
{
if (outputAsInt[idx] != expectedOutputAsInt[idx])
{
{
const char* name;
const char* assembly;
- void (*calculateExpected)(deInt32&, deInt32);
+ OpAtomicType opAtomic;
deInt32 numOutputElements;
- OpAtomicCase (const char* _name, const char* _assembly, void (*_calculateExpected)(deInt32&, deInt32), deInt32 _numOutputElements)
+ OpAtomicCase (const char* _name, const char* _assembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
: name (_name)
, assembly (_assembly)
- , calculateExpected (_calculateExpected)
- , numOutputElements (_numOutputElements) {}
+ , opAtomic (_opAtomic)
+ , numOutputElements (_numOutputElements) {}
};
tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer)
de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx,
useStorageBuffer ? "opatomic_storage_buffer" : "opatomic",
"Test the OpAtomic* opcodes"));
- de::Random rnd (deStringHash(group->getName()));
const int numElements = 65535;
vector<OpAtomicCase> cases;
"OpMemberDecorate %sumbuf 0 Coherent\n"
"OpMemberDecorate %sumbuf 0 Offset 0\n"
- "%void = OpTypeVoid\n"
- "%voidf = OpTypeFunction %void\n"
- "%u32 = OpTypeInt 32 0\n"
- "%i32 = OpTypeInt 32 1\n"
- "%uvec3 = OpTypeVector %u32 3\n"
- "%uvec3ptr = OpTypePointer Input %uvec3\n"
- "%i32ptr = OpTypePointer ${BLOCK_POINTER_TYPE} %i32\n"
- "%i32arr = OpTypeRuntimeArray %i32\n"
+ + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
"%buf = OpTypeStruct %i32arr\n"
"%bufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
" OpReturn\n"
" OpFunctionEnd\n");
- #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, NUM_OUTPUT_ELEMENTS) \
+ #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
do { \
DE_STATIC_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
- struct calculateExpected_##NAME { static void calculateExpected(deInt32& expected, deInt32 input) CALCULATE_EXPECTED }; /* NOLINT(CALCULATE_EXPECTED) */ \
- cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, calculateExpected_##NAME::calculateExpected, NUM_OUTPUT_ELEMENTS)); \
+ cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
} while (deGetFalse())
- #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, CALCULATE_EXPECTED) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, 1)
- #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, CALCULATE_EXPECTED) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, numElements)
+ #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, 1)
+ #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, numElements)
- ADD_OPATOMIC_CASE_1(iadd, "%unused = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", { expected += input; } );
- ADD_OPATOMIC_CASE_1(isub, "%unused = OpAtomicISub %i32 %outloc %one %zero %inval\n", { expected -= input; } );
- ADD_OPATOMIC_CASE_1(iinc, "%unused = OpAtomicIIncrement %i32 %outloc %one %zero\n", { ++expected; (void)input;} );
- ADD_OPATOMIC_CASE_1(idec, "%unused = OpAtomicIDecrement %i32 %outloc %one %zero\n", { --expected; (void)input;} );
+ ADD_OPATOMIC_CASE_1(iadd, "%unused = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", OPATOMIC_IADD );
+ ADD_OPATOMIC_CASE_1(isub, "%unused = OpAtomicISub %i32 %outloc %one %zero %inval\n", OPATOMIC_ISUB );
+ ADD_OPATOMIC_CASE_1(iinc, "%unused = OpAtomicIIncrement %i32 %outloc %one %zero\n", OPATOMIC_IINC );
+ ADD_OPATOMIC_CASE_1(idec, "%unused = OpAtomicIDecrement %i32 %outloc %one %zero\n", OPATOMIC_IDEC );
ADD_OPATOMIC_CASE_N(load, "%inval2 = OpAtomicLoad %i32 %inloc %zero %zero\n"
- " OpStore %outloc %inval2\n", { expected = input;} );
- ADD_OPATOMIC_CASE_N(store, " OpAtomicStore %outloc %zero %zero %inval\n", { expected = input;} );
+ " OpStore %outloc %inval2\n", OPATOMIC_LOAD );
+ ADD_OPATOMIC_CASE_N(store, " OpAtomicStore %outloc %zero %zero %inval\n", OPATOMIC_STORE );
ADD_OPATOMIC_CASE_N(compex, "%even = OpSMod %i32 %inval %two\n"
" OpStore %outloc %even\n"
- "%unused = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n", { expected = (input % 2) == 0 ? -1 : 1;} );
+ "%unused = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n", OPATOMIC_COMPEX );
#undef ADD_OPATOMIC_CASE
#undef ADD_OPATOMIC_CASE_1
if (useStorageBuffer)
spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
- fillRandomScalars(rnd, 1, 100, &inputInts[0], numElements);
- for (size_t ndx = 0; ndx < numElements; ++ndx)
- {
- cases[caseNdx].calculateExpected((cases[caseNdx].numOutputElements == 1) ? expected[0] : expected[ndx], inputInts[ndx]);
- }
-
- spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
- spec.outputs.push_back(BufferSp(new Int32Buffer(expected)));
+ spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
+ spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
spec.numWorkGroups = IVec3(numElements, 1, 1);
group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
}
if (outputAllocs.size() != 1)
return false;
- // We really just need this for size because we are not comparing the exact values.
- const BufferSp& expectedOutput = expectedOutputs[0];
- const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+ // Only size is needed because we are not comparing the exact values.
+ size_t byteSize = expectedOutputs[0]->getByteSize();
- for(size_t i = 0; i < expectedOutput->getNumBytes() / sizeof(float); ++i) {
+ const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+ for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
if (outputAsFloat[i] != 0.f &&
outputAsFloat[i] != -ldexp(1, -24)) {
return false;
if (outputAllocs.size() != 1)
return false;
- const BufferSp& expectedOutput = expectedOutputs[0];
- const float *expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
- const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+ vector<deUint8> expectedBytes;
+ expectedOutputs[0]->getBytes(expectedBytes);
+
+ const float* expectedOutputAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
+ const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
- for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+ for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
{
const float f0 = expectedOutputAsFloat[idx];
const float f1 = outputAsFloat[idx];
"OpDecorate %f32arr ArrayStride 4\n"
"OpMemberDecorate %buf 0 Offset 0\n"
- + string(getComputeAsmCommonTypes()) +
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%rem = OpFRem %f32 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareFRem;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+bool compareNMin (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+ if (outputAllocs.size() != 1)
+ return false;
+
+ const BufferSp& expectedOutput (expectedOutputs[0]);
+ std::vector<deUint8> data;
+ expectedOutput->getBytes(data);
+
+ const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
+ const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+ for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
+ {
+ const float f0 = expectedOutputAsFloat[idx];
+ const float f1 = outputAsFloat[idx];
+
+ // For NMin, we accept NaN as output if both inputs were NaN.
+ // Otherwise the NaN is the wrong choise, as on architectures that
+ // do not handle NaN, those are huge values.
+ if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
+ return false;
+ }
+
+ return true;
+}
+
+tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+ vector<float> inputFloats1 (numElements, 0);
+ vector<float> inputFloats2 (numElements, 0);
+ vector<float> outputFloats (numElements, 0);
+
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+ // Make the first case a full-NAN case.
+ inputFloats1[0] = TCU_NAN;
+ inputFloats2[0] = TCU_NAN;
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // By default, pick the smallest
+ outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
+
+ // Make half of the cases NaN cases
+ if ((ndx & 1) == 0)
+ {
+ // Alternate between the NaN operand
+ if ((ndx & 2) == 0)
+ {
+ outputFloats[ndx] = inputFloats2[ndx];
+ inputFloats1[ndx] = TCU_NAN;
+ }
+ else
+ {
+ outputFloats[ndx] = inputFloats1[ndx];
+ inputFloats2[ndx] = TCU_NAN;
+ }
+ }
+ }
+
+ spec.assembly =
+ "OpCapability Shader\n"
+ "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %f32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%rem = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareNMin;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+bool compareNMax (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+ if (outputAllocs.size() != 1)
+ return false;
+
+ const BufferSp& expectedOutput = expectedOutputs[0];
+ std::vector<deUint8> data;
+ expectedOutput->getBytes(data);
+
+ const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
+ const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+ for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
+ {
+ const float f0 = expectedOutputAsFloat[idx];
+ const float f1 = outputAsFloat[idx];
+
+ // For NMax, NaN is considered acceptable result, since in
+ // architectures that do not handle NaNs, those are huge values.
+ if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
+ return false;
+ }
+
+ return true;
+}
+
+tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+ vector<float> inputFloats1 (numElements, 0);
+ vector<float> inputFloats2 (numElements, 0);
+ vector<float> outputFloats (numElements, 0);
+
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+ // Make the first case a full-NAN case.
+ inputFloats1[0] = TCU_NAN;
+ inputFloats2[0] = TCU_NAN;
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // By default, pick the biggest
+ outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+
+ // Make half of the cases NaN cases
+ if ((ndx & 1) == 0)
+ {
+ // Alternate between the NaN operand
+ if ((ndx & 2) == 0)
+ {
+ outputFloats[ndx] = inputFloats2[ndx];
+ inputFloats1[ndx] = TCU_NAN;
+ }
+ else
+ {
+ outputFloats[ndx] = inputFloats1[ndx];
+ inputFloats2[ndx] = TCU_NAN;
+ }
+ }
+ }
+
+ spec.assembly =
+ "OpCapability Shader\n"
+ "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %f32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%rem = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareNMax;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+bool compareNClamp (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+ if (outputAllocs.size() != 1)
+ return false;
+
+ const BufferSp& expectedOutput = expectedOutputs[0];
+ std::vector<deUint8> data;
+ expectedOutput->getBytes(data);
+
+ const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
+ const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+ for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
+ {
+ const float e0 = expectedOutputAsFloat[idx * 2];
+ const float e1 = expectedOutputAsFloat[idx * 2 + 1];
+ const float res = outputAsFloat[idx];
+
+ // For NClamp, we have two possible outcomes based on
+ // whether NaNs are handled or not.
+ // If either min or max value is NaN, the result is undefined,
+ // so this test doesn't stress those. If the clamped value is
+ // NaN, and NaNs are handled, the result is min; if NaNs are not
+ // handled, they are big values that result in max.
+ // If all three parameters are NaN, the result should be NaN.
+ if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
+ (deFloatAbs(e0 - res) < 0.00001f) ||
+ (deFloatAbs(e1 - res) < 0.00001f)))
+ return false;
+ }
+
+ return true;
+}
+
+tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
+ ComputeShaderSpec spec;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+ vector<float> inputFloats1 (numElements, 0);
+ vector<float> inputFloats2 (numElements, 0);
+ vector<float> inputFloats3 (numElements, 0);
+ vector<float> outputFloats (numElements * 2, 0);
+
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+ fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // Results are only defined if max value is bigger than min value.
+ if (inputFloats2[ndx] > inputFloats3[ndx])
+ {
+ float t = inputFloats2[ndx];
+ inputFloats2[ndx] = inputFloats3[ndx];
+ inputFloats3[ndx] = t;
+ }
+
+ // By default, do the clamp, setting both possible answers
+ float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
+
+ float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+ float maxResB = maxResA;
+
+ // Alternate between the NaN cases
+ if (ndx & 1)
+ {
+ inputFloats1[ndx] = TCU_NAN;
+ // If NaN is handled, the result should be same as the clamp minimum.
+ // If NaN is not handled, the result should clamp to the clamp maximum.
+ maxResA = inputFloats2[ndx];
+ maxResB = inputFloats3[ndx];
+ }
+ else
+ {
+ // Not a NaN case - only one legal result.
+ maxResA = defaultRes;
+ maxResB = defaultRes;
+ }
+
+ outputFloats[ndx * 2] = maxResA;
+ outputFloats[ndx * 2 + 1] = maxResB;
+ }
+
+ // Make the first case a full-NAN case.
+ inputFloats1[0] = TCU_NAN;
+ inputFloats2[0] = TCU_NAN;
+ inputFloats3[0] = TCU_NAN;
+ outputFloats[0] = TCU_NAN;
+ outputFloats[1] = TCU_NAN;
+
+ spec.assembly =
+ "OpCapability Shader\n"
+ "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %indata3 DescriptorSet 0\n"
+ "OpDecorate %indata3 Binding 2\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 3\n"
+ "OpDecorate %f32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %f32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%indata3 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %f32 %inloc1\n"
+ "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %f32 %inloc2\n"
+ "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
+ "%inval3 = OpLoad %f32 %inloc3\n"
+ "%rem = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+ spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
+ spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.verifyIO = &compareNClamp;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ int op1Min, op1Max; // operand ranges
+ int op2Min, op2Max;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
+ { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ ComputeShaderSpec spec;
+ vector<deInt32> inputInts1 (numElements, 0);
+ vector<deInt32> inputInts2 (numElements, 0);
+ vector<deInt32> outputInts (numElements, 0);
+
+ fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
+ fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
+ outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
+ }
+
+ spec.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %i32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i32 %inloc1\n"
+ "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i32 %inloc2\n"
+ "%rem = OpSRem %i32 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
+ }
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ bool positive;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
+ { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ ComputeShaderSpec spec;
+ vector<deInt64> inputInts1 (numElements, 0);
+ vector<deInt64> inputInts2 (numElements, 0);
+ vector<deInt64> outputInts (numElements, 0);
+
+ if (params.positive)
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
+ }
+ else
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
+ }
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
+ outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
+ }
+
+ spec.assembly =
+ "OpCapability Int64\n"
+
+ + string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i64arr ArrayStride 8\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes())
+ + string(getComputeAsmCommonInt64Types()) +
+
+ "%buf = OpTypeStruct %i64arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i64 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i64 %inloc1\n"
+ "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i64 %inloc2\n"
+ "%rem = OpSRem %i64 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
+ }
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ int op1Min, op1Max; // operand ranges
+ int op2Min, op2Max;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
+ { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+
+ ComputeShaderSpec spec;
+ vector<deInt32> inputInts1 (numElements, 0);
+ vector<deInt32> inputInts2 (numElements, 0);
+ vector<deInt32> outputInts (numElements, 0);
+
+ fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
+ fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
+
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
+ if (rem == 0)
+ {
+ outputInts[ndx] = 0;
+ }
+ else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
+ {
+ // They have the same sign
+ outputInts[ndx] = rem;
+ }
+ else
+ {
+ // They have opposite sign. The remainder operation takes the
+ // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
+ // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
+ // the result has the correct sign and that it is still
+ // congruent to inputInts1[ndx] modulo inputInts2[ndx]
+ //
+ // See also http://mathforum.org/library/drmath/view/52343.html
+ outputInts[ndx] = rem + inputInts2[ndx];
+ }
+ }
+
+ spec.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i32arr ArrayStride 4\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes()) +
+
+ "%buf = OpTypeStruct %i32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i32 %inloc1\n"
+ "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i32 %inloc2\n"
+ "%rem = OpSMod %i32 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
+ }
+
+ return group.release();
+}
+
+tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 200;
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessage; // customized status message
+ qpTestResult failResult; // override status on failure
+ bool positive;
+ } cases[] =
+ {
+ { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
+ { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
- "%buf = OpTypeStruct %f32arr\n"
- "%bufptr = OpTypePointer Uniform %buf\n"
- "%indata1 = OpVariable %bufptr Uniform\n"
- "%indata2 = OpVariable %bufptr Uniform\n"
- "%outdata = OpVariable %bufptr Uniform\n"
+ ComputeShaderSpec spec;
+ vector<deInt64> inputInts1 (numElements, 0);
+ vector<deInt64> inputInts2 (numElements, 0);
+ vector<deInt64> outputInts (numElements, 0);
- "%id = OpVariable %uvec3ptr Input\n"
- "%zero = OpConstant %i32 0\n"
- "%main = OpFunction %void None %voidf\n"
- "%label = OpLabel\n"
- "%idval = OpLoad %uvec3 %id\n"
- "%x = OpCompositeExtract %u32 %idval 0\n"
- "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
- "%inval1 = OpLoad %f32 %inloc1\n"
- "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
- "%inval2 = OpLoad %f32 %inloc2\n"
- "%rem = OpFRem %f32 %inval1 %inval2\n"
- "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
- " OpStore %outloc %rem\n"
- " OpReturn\n"
- " OpFunctionEnd\n";
+ if (params.positive)
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
+ }
+ else
+ {
+ fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
+ fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
+ }
- spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
- spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
- spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
- spec.numWorkGroups = IVec3(numElements, 1, 1);
- spec.verifyIO = &compareFRem;
+ for (int ndx = 0; ndx < numElements; ++ndx)
+ {
+ deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
+ if (rem == 0)
+ {
+ outputInts[ndx] = 0;
+ }
+ else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
+ {
+ // They have the same sign
+ outputInts[ndx] = rem;
+ }
+ else
+ {
+ // They have opposite sign. The remainder operation takes the
+ // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
+ // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
+ // the result has the correct sign and that it is still
+ // congruent to inputInts1[ndx] modulo inputInts2[ndx]
+ //
+ // See also http://mathforum.org/library/drmath/view/52343.html
+ outputInts[ndx] = rem + inputInts2[ndx];
+ }
+ }
- group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+ spec.assembly =
+ "OpCapability Int64\n"
+
+ + string(getComputeAsmShaderPreamble()) +
+
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ "OpDecorate %buf BufferBlock\n"
+ "OpDecorate %indata1 DescriptorSet 0\n"
+ "OpDecorate %indata1 Binding 0\n"
+ "OpDecorate %indata2 DescriptorSet 0\n"
+ "OpDecorate %indata2 Binding 1\n"
+ "OpDecorate %outdata DescriptorSet 0\n"
+ "OpDecorate %outdata Binding 2\n"
+ "OpDecorate %i64arr ArrayStride 8\n"
+ "OpMemberDecorate %buf 0 Offset 0\n"
+
+ + string(getComputeAsmCommonTypes())
+ + string(getComputeAsmCommonInt64Types()) +
+
+ "%buf = OpTypeStruct %i64arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata1 = OpVariable %bufptr Uniform\n"
+ "%indata2 = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i64 0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%label = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
+ "%inval1 = OpLoad %i64 %inloc1\n"
+ "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
+ "%inval2 = OpLoad %i64 %inloc2\n"
+ "%rem = OpSMod %i64 %inval1 %inval2\n"
+ "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
+ " OpStore %outloc %rem\n"
+ " OpReturn\n"
+ " OpFunctionEnd\n";
+
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
+ spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
+ spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
+ spec.numWorkGroups = IVec3(numElements, 1, 1);
+ spec.failResult = params.failResult;
+ spec.failMessage = params.failMessage;
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
+ }
return group.release();
}
vector<deInt32> outputInts3 (numElements, 0);
vector<deInt32> outputInts4 (numElements, 0);
const StringTemplate shaderTemplate (
- string(getComputeAsmShaderPreamble()) +
+ "${CAPABILITIES:opt}"
+ + string(getComputeAsmShaderPreamble()) +
"OpName %main \"main\"\n"
"OpName %id \"gl_GlobalInvocationID\"\n"
+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
+ "${OPTYPE_DEFINITIONS:opt}"
"%buf = OpTypeStruct %i32arr\n"
"%bufptr = OpTypePointer Uniform %buf\n"
"%indata = OpVariable %bufptr Uniform\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
+ "${TYPE_CONVERT:opt}"
"%idval = OpLoad %uvec3 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
}
const char addScToInput[] = "OpIAdd %i32 %inval %sc_final";
+ const char addSc32ToInput[] = "OpIAdd %i32 %inval %sc_final32";
const char selectTrueUsingSc[] = "OpSelect %i32 %sc_final %inval %zero";
const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
cases.push_back(SpecConstantTwoIntCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -43, 0, addScToInput, outputInts1));
cases.push_back(SpecConstantTwoIntCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputInts2));
cases.push_back(SpecConstantTwoIntCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %zero", 1, 42, addScToInput, outputInts1));
- // OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
+ cases.push_back(SpecConstantTwoIntCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -11200, 0, addSc32ToInput, outputInts3));
+ // -969998336 stored as 32-bit two's complement is the binary representation of -11200 as IEEE-754 Float
+ cases.push_back(SpecConstantTwoIntCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", -969998336, 0, addSc32ToInput, outputInts3));
for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
{
map<string, string> specializations;
ComputeShaderSpec spec;
+ ComputeTestFeatures features = COMPUTE_TEST_USES_NONE;
specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
specializations["SC_OP"] = cases[caseNdx].scOperation;
specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
+ // Special SPIR-V code for SConvert-case
+ if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
+ {
+ features = COMPUTE_TEST_USES_INT16;
+ specializations["CAPABILITIES"] = "OpCapability Int16\n"; // Adds 16-bit integer capability
+ specializations["OPTYPE_DEFINITIONS"] = "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
+ specializations["TYPE_CONVERT"] = "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 16-bit integer to 32-bit integer
+ }
+
+ // Special SPIR-V code for FConvert-case
+ if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
+ {
+ features = COMPUTE_TEST_USES_FLOAT64;
+ specializations["CAPABILITIES"] = "OpCapability Float64\n"; // Adds 64-bit float capability
+ specializations["OPTYPE_DEFINITIONS"] = "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
+ specializations["TYPE_CONVERT"] = "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 64-bit float to 32-bit integer
+ }
+
spec.assembly = shaderTemplate.specialize(specializations);
spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
spec.specConstants.push_back(cases[caseNdx].scActualValue0);
spec.specConstants.push_back(cases[caseNdx].scActualValue1);
- group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec, features));
}
ComputeShaderSpec spec;
+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
- "%ivec3 = OpTypeVector %i32 3\n"
- "%buf = OpTypeStruct %i32arr\n"
- "%bufptr = OpTypePointer Uniform %buf\n"
- "%indata = OpVariable %bufptr Uniform\n"
- "%outdata = OpVariable %bufptr Uniform\n"
+ "%ivec3 = OpTypeVector %i32 3\n"
+ "%buf = OpTypeStruct %i32arr\n"
+ "%bufptr = OpTypePointer Uniform %buf\n"
+ "%indata = OpVariable %bufptr Uniform\n"
+ "%outdata = OpVariable %bufptr Uniform\n"
- "%id = OpVariable %uvec3ptr Input\n"
- "%zero = OpConstant %i32 0\n"
- "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
+ "%vec3_undef = OpUndef %ivec3\n"
"%sc_0 = OpSpecConstant %i32 0\n"
"%sc_1 = OpSpecConstant %i32 0\n"
"%sc_2 = OpSpecConstant %i32 0\n"
- "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
- "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
- "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
- "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %sc_vec3_1 1 0 4\n" // (0, sc_0, sc_1)
- "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
- "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
- "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
- "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
- "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
- "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
+ "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
+ "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
+ "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
+ "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
+ "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
+ "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
+ "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
+ "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
+ "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
+ "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
+ "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
+ "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
+ "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
spec.specConstants.push_back(56);
spec.specConstants.push_back(-77);
- group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
+
+ return group.release();
+}
+
+void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
+{
+ ComputeShaderSpec specInt;
+ ComputeShaderSpec specFloat;
+ ComputeShaderSpec specVec3;
+ ComputeShaderSpec specMat4;
+ ComputeShaderSpec specArray;
+ ComputeShaderSpec specStruct;
+ de::Random rnd (deStringHash(group->getName()));
+ const int numElements = 100;
+ vector<float> inputFloats (numElements, 0);
+ vector<float> outputFloats (numElements, 0);
+
+ fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
+
+ // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
+ floorAll(inputFloats);
+
+ for (size_t ndx = 0; ndx < numElements; ++ndx)
+ {
+ // Just check if the value is positive or not
+ outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
+ }
+
+ // All of the tests are of the form:
+ //
+ // testtype r
+ //
+ // if (inputdata > 0)
+ // r = 1
+ // else
+ // r = -1
+ //
+ // return (float)r
+
+ specFloat.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%float_0 = OpConstant %f32 0.0\n"
+ "%float_1 = OpConstant %f32 1.0\n"
+ "%float_n1 = OpConstant %f32 -1.0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
+ " OpSelectionMerge %cm None\n"
+ " OpBranchConditional %comp %tb %fb\n"
+ "%tb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%fb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%cm = OpLabel\n"
+ "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ specFloat.numWorkGroups = IVec3(numElements, 1, 1);
+
+ specMat4.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%v4f32 = OpTypeVector %f32 4\n"
+ "%mat4v4f32 = OpTypeMatrix %v4f32 4\n"
+ "%zero = OpConstant %i32 0\n"
+ "%float_0 = OpConstant %f32 0.0\n"
+ "%float_1 = OpConstant %f32 1.0\n"
+ "%float_n1 = OpConstant %f32 -1.0\n"
+ "%m11 = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
+ "%m12 = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
+ "%m13 = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
+ "%m14 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
+ "%m1 = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
+ "%m21 = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
+ "%m22 = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
+ "%m23 = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
+ "%m24 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
+ "%m2 = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
+ " OpSelectionMerge %cm None\n"
+ " OpBranchConditional %comp %tb %fb\n"
+ "%tb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%fb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%cm = OpLabel\n"
+ "%mres = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
+ "%res = OpCompositeExtract %f32 %mres 2 2\n"
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ specMat4.numWorkGroups = IVec3(numElements, 1, 1);
+
+ specVec3.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%float_0 = OpConstant %f32 0.0\n"
+ "%float_1 = OpConstant %f32 1.0\n"
+ "%float_n1 = OpConstant %f32 -1.0\n"
+ "%v1 = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
+ "%v2 = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
+ " OpSelectionMerge %cm None\n"
+ " OpBranchConditional %comp %tb %fb\n"
+ "%tb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%fb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%cm = OpLabel\n"
+ "%vres = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
+ "%res = OpCompositeExtract %f32 %vres 2\n"
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ specVec3.numWorkGroups = IVec3(numElements, 1, 1);
+
+ specInt.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%float_0 = OpConstant %f32 0.0\n"
+ "%i1 = OpConstant %i32 1\n"
+ "%i2 = OpConstant %i32 -1\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
+ " OpSelectionMerge %cm None\n"
+ " OpBranchConditional %comp %tb %fb\n"
+ "%tb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%fb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%cm = OpLabel\n"
+ "%ires = OpPhi %i32 %i1 %tb %i2 %fb\n"
+ "%res = OpConvertSToF %f32 %ires\n"
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ specInt.numWorkGroups = IVec3(numElements, 1, 1);
+
+ specArray.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%u7 = OpConstant %u32 7\n"
+ "%float_0 = OpConstant %f32 0.0\n"
+ "%float_1 = OpConstant %f32 1.0\n"
+ "%float_n1 = OpConstant %f32 -1.0\n"
+ "%f32a7 = OpTypeArray %f32 %u7\n"
+ "%a1 = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
+ "%a2 = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
+ " OpSelectionMerge %cm None\n"
+ " OpBranchConditional %comp %tb %fb\n"
+ "%tb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%fb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%cm = OpLabel\n"
+ "%ares = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
+ "%res = OpCompositeExtract %f32 %ares 5\n"
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ specArray.numWorkGroups = IVec3(numElements, 1, 1);
+
+ specStruct.assembly =
+ string(getComputeAsmShaderPreamble()) +
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%float_0 = OpConstant %f32 0.0\n"
+ "%float_1 = OpConstant %f32 1.0\n"
+ "%float_n1 = OpConstant %f32 -1.0\n"
+
+ "%v2f32 = OpTypeVector %f32 2\n"
+ "%Data2 = OpTypeStruct %f32 %v2f32\n"
+ "%Data = OpTypeStruct %Data2 %f32\n"
+
+ "%in1a = OpConstantComposite %v2f32 %float_1 %float_1\n"
+ "%in1b = OpConstantComposite %Data2 %float_1 %in1a\n"
+ "%s1 = OpConstantComposite %Data %in1b %float_1\n"
+ "%in2a = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
+ "%in2b = OpConstantComposite %Data2 %float_n1 %in2a\n"
+ "%s2 = OpConstantComposite %Data %in2b %float_n1\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
+ " OpSelectionMerge %cm None\n"
+ " OpBranchConditional %comp %tb %fb\n"
+ "%tb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%fb = OpLabel\n"
+ " OpBranch %cm\n"
+ "%cm = OpLabel\n"
+ "%sres = OpPhi %Data %s1 %tb %s2 %fb\n"
+ "%res = OpCompositeExtract %f32 %sres 0 0\n"
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+ specStruct.numWorkGroups = IVec3(numElements, 1, 1);
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
+}
+
+string generateConstantDefinitions (int count)
+{
+ std::ostringstream r;
+ for (int i = 0; i < count; i++)
+ r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
+ r << "\n";
+ return r.str();
+}
+
+string generateSwitchCases (int count)
+{
+ std::ostringstream r;
+ for (int i = 0; i < count; i++)
+ r << " " << i << " %case" << i;
+ r << "\n";
+ return r.str();
+}
+
+string generateSwitchTargets (int count)
+{
+ std::ostringstream r;
+ for (int i = 0; i < count; i++)
+ r << "%case" << i << " = OpLabel\n OpBranch %phi\n";
+ r << "\n";
+ return r.str();
+}
+
+string generateOpPhiParams (int count)
+{
+ std::ostringstream r;
+ for (int i = 0; i < count; i++)
+ r << " %cf" << (i * 10 + 5) << " %case" << i;
+ r << "\n";
+ return r.str();
+}
+
+string generateIntWidth (int value)
+{
+ std::ostringstream r;
+ r << value;
+ return r.str();
+}
+
+// Expand input string by injecting "ABC" between the input
+// string characters. The acc/add/treshold parameters are used
+// to skip some of the injections to make the result less
+// uniform (and a lot shorter).
+string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
+{
+ std::ostringstream res;
+ const char* p = s.c_str();
+
+ while (*p)
+ {
+ res << *p;
+ acc += add;
+ if (acc > treshold)
+ {
+ acc -= treshold;
+ res << "ABC";
+ }
+ p++;
+ }
+ return res.str();
+}
+
+// Calculate expected result based on the code string
+float calcOpPhiCase5 (float val, const string& s)
+{
+ const char* p = s.c_str();
+ float x[8];
+ bool b[8];
+ const float tv[8] = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
+ const float v = deFloatAbs(val);
+ float res = 0;
+ int depth = -1;
+ int skip = 0;
+
+ for (int i = 7; i >= 0; --i)
+ x[i] = std::fmod((float)v, (float)(2 << i));
+ for (int i = 7; i >= 0; --i)
+ b[i] = x[i] > tv[i];
+
+ while (*p)
+ {
+ if (*p == 'A')
+ {
+ depth++;
+ if (skip == 0 && b[depth])
+ {
+ res++;
+ }
+ else
+ skip++;
+ }
+ if (*p == 'B')
+ {
+ if (skip)
+ skip--;
+ if (b[depth] || skip)
+ skip++;
+ }
+ if (*p == 'C')
+ {
+ depth--;
+ if (skip)
+ skip--;
+ }
+ p++;
+ }
+ return res;
+}
- return group.release();
+// In the code string, the letters represent the following:
+//
+// A:
+// if (certain bit is set)
+// {
+// result++;
+//
+// B:
+// } else {
+//
+// C:
+// }
+//
+// examples:
+// AABCBC leads to if(){r++;if(){r++;}else{}}else{}
+// ABABCC leads to if(){r++;}else{if(){r++;}else{}}
+// ABCABC leads to if(){r++;}else{}if(){r++;}else{}
+//
+// Code generation gets a bit complicated due to the else-branches,
+// which do not generate new values. Thus, the generator needs to
+// keep track of the previous variable change seen by the else
+// branch.
+string generateOpPhiCase5 (const string& s)
+{
+ std::stack<int> idStack;
+ std::stack<std::string> value;
+ std::stack<std::string> valueLabel;
+ std::stack<std::string> mergeLeft;
+ std::stack<std::string> mergeRight;
+ std::ostringstream res;
+ const char* p = s.c_str();
+ int depth = -1;
+ int currId = 0;
+ int iter = 0;
+
+ idStack.push(-1);
+ value.push("%f32_0");
+ valueLabel.push("%f32_0 %entry");
+
+ while (*p)
+ {
+ if (*p == 'A')
+ {
+ depth++;
+ currId = iter;
+ idStack.push(currId);
+ res << "\tOpSelectionMerge %m" << currId << " None\n";
+ res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
+ res << "%t" << currId << " = OpLabel\n";
+ res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
+ std::ostringstream tag;
+ tag << "%rt" << currId;
+ value.push(tag.str());
+ tag << " %t" << currId;
+ valueLabel.push(tag.str());
+ }
+
+ if (*p == 'B')
+ {
+ mergeLeft.push(valueLabel.top());
+ value.pop();
+ valueLabel.pop();
+ res << "\tOpBranch %m" << currId << "\n";
+ res << "%f" << currId << " = OpLabel\n";
+ std::ostringstream tag;
+ tag << value.top() << " %f" << currId;
+ valueLabel.pop();
+ valueLabel.push(tag.str());
+ }
+
+ if (*p == 'C')
+ {
+ mergeRight.push(valueLabel.top());
+ res << "\tOpBranch %m" << currId << "\n";
+ res << "%m" << currId << " = OpLabel\n";
+ if (*(p + 1) == 0)
+ res << "%res"; // last result goes to %res
+ else
+ res << "%rm" << currId;
+ res << " = OpPhi %f32 " << mergeLeft.top() << " " << mergeRight.top() << "\n";
+ std::ostringstream tag;
+ tag << "%rm" << currId;
+ value.pop();
+ value.push(tag.str());
+ tag << " %m" << currId;
+ valueLabel.pop();
+ valueLabel.push(tag.str());
+ mergeLeft.pop();
+ mergeRight.pop();
+ depth--;
+ idStack.pop();
+ currId = idStack.top();
+ }
+ p++;
+ iter++;
+ }
+ return res.str();
}
tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
ComputeShaderSpec spec1;
ComputeShaderSpec spec2;
ComputeShaderSpec spec3;
+ ComputeShaderSpec spec4;
+ ComputeShaderSpec spec5;
de::Random rnd (deStringHash(group->getName()));
const int numElements = 100;
vector<float> inputFloats (numElements, 0);
vector<float> outputFloats1 (numElements, 0);
vector<float> outputFloats2 (numElements, 0);
vector<float> outputFloats3 (numElements, 0);
+ vector<float> outputFloats4 (numElements, 0);
+ vector<float> outputFloats5 (numElements, 0);
+ std::string codestring = "ABC";
+ const int test4Width = 1024;
+
+ // Build case 5 code string. Each iteration makes the hierarchy more complicated.
+ // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
+ // shader code.
+ for (int i = 0, acc = 0; i < 9; i++)
+ codestring = expandOpPhiCase5(codestring, acc, 7, 24);
fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
}
outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
outputFloats3[ndx] = 8.5f - inputFloats[ndx];
+
+ int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
+ outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
+
+ outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
}
spec1.assembly =
group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
+ spec4.assembly =
+ "OpCapability Shader\n"
+ "%ext = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%cimod = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
+
+ + generateConstantDefinitions(test4Width) +
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+ "%xf = OpConvertUToF %f32 %x\n"
+ "%xm = OpFMul %f32 %xf %inval\n"
+ "%xa = OpExtInst %f32 %ext FAbs %xm\n"
+ "%xi = OpConvertFToU %u32 %xa\n"
+ "%selector = OpUMod %u32 %xi %cimod\n"
+ " OpSelectionMerge %phi None\n"
+ " OpSwitch %selector %default "
+
+ + generateSwitchCases(test4Width) +
+
+ "%default = OpLabel\n"
+ " OpUnreachable\n"
+
+ + generateSwitchTargets(test4Width) +
+
+ "%phi = OpLabel\n"
+ "%result = OpPhi %f32"
+
+ + generateOpPhiParams(test4Width) +
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %result\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
+ spec4.numWorkGroups = IVec3(numElements, 1, 1);
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
+
+ spec5.assembly =
+ "OpCapability Shader\n"
+ "%ext = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint GLCompute %main \"main\" %id\n"
+ "OpExecutionMode %main LocalSize 1 1 1\n"
+ "%code = OpString \"" + codestring + "\"\n"
+
+ "OpSource GLSL 430\n"
+ "OpName %main \"main\"\n"
+ "OpName %id \"gl_GlobalInvocationID\"\n"
+
+ "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+ + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+ "%id = OpVariable %uvec3ptr Input\n"
+ "%zero = OpConstant %i32 0\n"
+ "%f32_0 = OpConstant %f32 0.0\n"
+ "%f32_0_5 = OpConstant %f32 0.5\n"
+ "%f32_1 = OpConstant %f32 1.0\n"
+ "%f32_1_5 = OpConstant %f32 1.5\n"
+ "%f32_2 = OpConstant %f32 2.0\n"
+ "%f32_3_5 = OpConstant %f32 3.5\n"
+ "%f32_4 = OpConstant %f32 4.0\n"
+ "%f32_7_5 = OpConstant %f32 7.5\n"
+ "%f32_8 = OpConstant %f32 8.0\n"
+ "%f32_15_5 = OpConstant %f32 15.5\n"
+ "%f32_16 = OpConstant %f32 16.0\n"
+ "%f32_31_5 = OpConstant %f32 31.5\n"
+ "%f32_32 = OpConstant %f32 32.0\n"
+ "%f32_63_5 = OpConstant %f32 63.5\n"
+ "%f32_64 = OpConstant %f32 64.0\n"
+ "%f32_127_5 = OpConstant %f32 127.5\n"
+ "%f32_128 = OpConstant %f32 128.0\n"
+ "%f32_256 = OpConstant %f32 256.0\n"
+
+ "%main = OpFunction %void None %voidf\n"
+ "%entry = OpLabel\n"
+ "%idval = OpLoad %uvec3 %id\n"
+ "%x = OpCompositeExtract %u32 %idval 0\n"
+ "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
+ "%inval = OpLoad %f32 %inloc\n"
+
+ "%xabs = OpExtInst %f32 %ext FAbs %inval\n"
+ "%x8 = OpFMod %f32 %xabs %f32_256\n"
+ "%x7 = OpFMod %f32 %xabs %f32_128\n"
+ "%x6 = OpFMod %f32 %xabs %f32_64\n"
+ "%x5 = OpFMod %f32 %xabs %f32_32\n"
+ "%x4 = OpFMod %f32 %xabs %f32_16\n"
+ "%x3 = OpFMod %f32 %xabs %f32_8\n"
+ "%x2 = OpFMod %f32 %xabs %f32_4\n"
+ "%x1 = OpFMod %f32 %xabs %f32_2\n"
+
+ "%b7 = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
+ "%b6 = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
+ "%b5 = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
+ "%b4 = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
+ "%b3 = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
+ "%b2 = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
+ "%b1 = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
+ "%b0 = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
+
+ + generateOpPhiCase5(codestring) +
+
+ "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
+ " OpStore %outloc %res\n"
+ " OpReturn\n"
+
+ " OpFunctionEnd\n";
+ spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+ spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
+ spec5.numWorkGroups = IVec3(numElements, 1, 1);
+
+ group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
+
+ createOpPhiVartypeTests(group, testCtx);
+
return group.release();
}
if (outputAllocs.size() != 1)
return false;
- // We really just need this for size because we cannot compare Nans.
- const BufferSp& expectedOutput = expectedOutputs[0];
- const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+ // Only size is needed because we cannot compare Nans.
+ size_t byteSize = expectedOutputs[0]->getByteSize();
+
+ const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
- if (expectedOutput->getNumBytes() != 4*sizeof(float)) {
+ if (byteSize != 4*sizeof(float)) {
return false;
}
if (outputAllocs.size() != 1)
return false;
- // We really just need this for size because we cannot compare Nans.
- const BufferSp& expectedOutput = expectedOutputs[0];
- const float* output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+ // Only size is needed because we cannot compare Nans.
+ size_t byteSize = expectedOutputs[0]->getByteSize();
- for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+ const float* const output_as_float = static_cast<const float* const>(outputAllocs[0]->getHostPtr());
+
+ for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
{
if (!deFloatIsNaN(output_as_float[idx]))
{
return group.release();
}
+
} // anonymous
tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
"OpDecorate %sc_1 SpecId 1\n";
const char typesAndConstants1[] =
+ "${OPTYPE_DEFINITIONS:opt}"
"%sc_0 = OpSpecConstant${SC_DEF0}\n"
"%sc_1 = OpSpecConstant${SC_DEF1}\n"
"%sc_op = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
"%test_code = OpFunction %v4f32 None %v4f32_function\n"
"%param = OpFunctionParameter %v4f32\n"
"%label = OpLabel\n"
+ "${TYPE_CONVERT:opt}"
"%result = OpVariable %fp_v4f32 Function\n"
" OpStore %result %param\n"
"%gen = ${GEN_RESULT}\n"
outputColors2[3] = RGBA(0, 0, 255, 255);
const char addZeroToSc[] = "OpIAdd %i32 %c_i32_0 %sc_op";
+ const char addZeroToSc32[] = "OpIAdd %i32 %c_i32_0 %sc_op32";
const char selectTrueUsingSc[] = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
cases.push_back(SpecConstantTwoIntGraphicsCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -2, 0, addZeroToSc, outputColors2));
cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputColors2));
cases.push_back(SpecConstantTwoIntGraphicsCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %c_i32_0", 1, 1, addZeroToSc, outputColors2));
- // OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
+ cases.push_back(SpecConstantTwoIntGraphicsCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -1, 0, addZeroToSc32, outputColors0));
+ // -1082130432 stored as 32-bit two's complement is the binary representation of -1 as IEEE-754 Float
+ cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", -1082130432, 0, addZeroToSc32, outputColors0));
// \todo[2015-12-1 antiagainst] OpQuantizeToF16
for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
{
- map<string, string> specializations;
- map<string, string> fragments;
- vector<deInt32> specConstants;
+ map<string, string> specializations;
+ map<string, string> fragments;
+ vector<deInt32> specConstants;
+ vector<string> features;
+ PushConstants noPushConstants;
+ GraphicsResources noResources;
+ GraphicsInterfaces noInterfaces;
+ std::vector<std::string> noExtensions;
+
+ // Special SPIR-V code for SConvert-case
+ if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
+ {
+ features.push_back("shaderInt16");
+ fragments["capability"] = "OpCapability Int16\n"; // Adds 16-bit integer capability
+ specializations["OPTYPE_DEFINITIONS"] = "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
+ specializations["TYPE_CONVERT"] = "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 16-bit integer to 32-bit integer
+ }
+
+ // Special SPIR-V code for FConvert-case
+ if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
+ {
+ features.push_back("shaderFloat64");
+ fragments["capability"] = "OpCapability Float64\n"; // Adds 64-bit float capability
+ specializations["OPTYPE_DEFINITIONS"] = "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
+ specializations["TYPE_CONVERT"] = "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 64-bit float to 32-bit integer
+ }
specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
specConstants.push_back(cases[caseNdx].scActualValue0);
specConstants.push_back(cases[caseNdx].scActualValue1);
- createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants, group.get());
+ createTestsForAllStages(
+ cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
+ noPushConstants, noResources, noInterfaces, noExtensions, features, VulkanFeatures(), group.get());
}
const char decorations2[] =
"OpDecorate %sc_2 SpecId 2\n";
const char typesAndConstants2[] =
- "%v3i32 = OpTypeVector %i32 3\n"
-
- "%sc_0 = OpSpecConstant %i32 0\n"
- "%sc_1 = OpSpecConstant %i32 0\n"
- "%sc_2 = OpSpecConstant %i32 0\n"
-
+ "%v3i32 = OpTypeVector %i32 3\n"
"%vec3_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
- "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
- "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
- "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
- "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %sc_vec3_1 1 0 4\n" // (0, sc_0, sc_1)
- "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
- "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
- "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
- "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
- "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
- "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
+ "%vec3_undef = OpUndef %v3i32\n"
+
+ "%sc_0 = OpSpecConstant %i32 0\n"
+ "%sc_1 = OpSpecConstant %i32 0\n"
+ "%sc_2 = OpSpecConstant %i32 0\n"
+ "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
+ "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
+ "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
+ "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
+ "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
+ "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
+ "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
+ "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
+ "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
+ "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
+ "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
+ "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
+ "%sc_final = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
const char function2[] =
"%test_code = OpFunction %v4f32 None %v4f32_function\n"
"%c_vec4_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
"%c_f32_1pl2_23 = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
"%c_f32_1mi2_23 = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
- "%c_f32_n1pn24 = OpConstant %f32 -0x1p-24\n"
- ;
+ "%c_f32_n1pn24 = OpConstant %f32 -0x1p-24\n";
const char function[] =
"%test_code = OpFunction %v4f32 None %v4f32_function\n"
const NameCodePair tests[] =
{
{"bool", "", "%bool"},
- {"vec2uint32", "%type = OpTypeVector %u32 2", "%type"},
+ {"vec2uint32", "", "%v2u32"},
{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
{"sampler", "%type = OpTypeSampler", "%type"},
{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
"%b = OpFAdd %f32 %a %actually_zero\n"
"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
"OpReturnValue %ret\n"
- "OpFunctionEnd\n"
- ;
+ "OpFunctionEnd\n";
+
createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
fragments["testfun"] =
"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
"OpReturnValue %ret\n"
- "OpFunctionEnd\n"
- ;
+ "OpFunctionEnd\n";
+
createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
fragments["testfun"] =
"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
"OpReturnValue %ret\n"
- "OpFunctionEnd\n"
- ;
+ "OpFunctionEnd\n";
+
createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
fragments["testfun"] =
"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
"OpReturnValue %ret\n"
- "OpFunctionEnd\n"
- ;
+ "OpFunctionEnd\n";
+
createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
fragments["pre_main"] =
"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
"OpReturnValue %ret\n"
- "OpFunctionEnd\n"
- ;
+ "OpFunctionEnd\n";
+
createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
return opUndefTests.release();
"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
"OpReturnValue %result\n"
- "OpFunctionEnd\n"
- ;
+ "OpFunctionEnd\n";
+
createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
// Body comprised of multiple basic blocks.
return testGroup.release();
}
+// Test for the OpSRem instruction.
+tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
+ map<string, string> fragments;
+
+ fragments["pre_main"] =
+ "%c_f32_255 = OpConstant %f32 255.0\n"
+ "%c_i32_128 = OpConstant %i32 128\n"
+ "%c_i32_255 = OpConstant %i32 255\n"
+ "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
+ "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
+ "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
+
+ // The test does the following.
+ // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
+ // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
+ // return float(result + 128) / 255.0;
+ fragments["testfun"] =
+ "%test_code = OpFunction %v4f32 None %v4f32_function\n"
+ "%param1 = OpFunctionParameter %v4f32\n"
+ "%label_testfun = OpLabel\n"
+ "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
+ "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
+ "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
+ "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
+ "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
+ "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
+ "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
+ "%x_out = OpSRem %i32 %x_in %y_in\n"
+ "%y_out = OpSRem %i32 %y_in %z_in\n"
+ "%z_out = OpSRem %i32 %z_in %x_in\n"
+ "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
+ "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
+ "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
+ "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
+ "OpReturnValue %float_out\n"
+ "OpFunctionEnd\n";
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessageTemplate; // customized status message
+ qpTestResult failResult; // override status on failure
+ int operands[4][3]; // four (x, y, z) vectors of operands
+ int results[4][3]; // four (x, y, z) vectors of results
+ } cases[] =
+ {
+ {
+ "positive",
+ "${reason}",
+ QP_TEST_RESULT_FAIL,
+ { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
+ { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
+ },
+ {
+ "all",
+ "Inconsistent results, but within specification: ${reason}",
+ negFailResult, // negative operands, not required by the spec
+ { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
+ { { 5, 12, -2 }, { 0, -5, 2 }, { 3, 8, -6 }, { 25, -60, 0 } }, // results
+ },
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ RGBA inputColors[4];
+ RGBA outputColors[4];
+
+ for (int i = 0; i < 4; ++i)
+ {
+ inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
+ outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
+ }
+
+ createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
+ }
+
+ return testGroup.release();
+}
+
+// Test for the OpSMod instruction.
+tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+ de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
+ map<string, string> fragments;
+
+ fragments["pre_main"] =
+ "%c_f32_255 = OpConstant %f32 255.0\n"
+ "%c_i32_128 = OpConstant %i32 128\n"
+ "%c_i32_255 = OpConstant %i32 255\n"
+ "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
+ "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
+ "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
+
+ // The test does the following.
+ // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
+ // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
+ // return float(result + 128) / 255.0;
+ fragments["testfun"] =
+ "%test_code = OpFunction %v4f32 None %v4f32_function\n"
+ "%param1 = OpFunctionParameter %v4f32\n"
+ "%label_testfun = OpLabel\n"
+ "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
+ "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
+ "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
+ "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
+ "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
+ "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
+ "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
+ "%x_out = OpSMod %i32 %x_in %y_in\n"
+ "%y_out = OpSMod %i32 %y_in %z_in\n"
+ "%z_out = OpSMod %i32 %z_in %x_in\n"
+ "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
+ "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
+ "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
+ "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
+ "OpReturnValue %float_out\n"
+ "OpFunctionEnd\n";
+
+ const struct CaseParams
+ {
+ const char* name;
+ const char* failMessageTemplate; // customized status message
+ qpTestResult failResult; // override status on failure
+ int operands[4][3]; // four (x, y, z) vectors of operands
+ int results[4][3]; // four (x, y, z) vectors of results
+ } cases[] =
+ {
+ {
+ "positive",
+ "${reason}",
+ QP_TEST_RESULT_FAIL,
+ { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
+ { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
+ },
+ {
+ "all",
+ "Inconsistent results, but within specification: ${reason}",
+ negFailResult, // negative operands, not required by the spec
+ { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
+ { { 5, -5, 3 }, { 0, 2, -3 }, { 3, -73, 69 }, { -35, 40, 0 } }, // results
+ },
+ };
+ // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+ for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+ {
+ const CaseParams& params = cases[caseNdx];
+ RGBA inputColors[4];
+ RGBA outputColors[4];
+
+ for (int i = 0; i < 4; ++i)
+ {
+ inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
+ outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
+ }
+
+ createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
+ }
+
+ return testGroup.release();
+}
+
enum IntegerType
{
INTEGER_TYPE_SIGNED_16,
return "OpTypeInt " + getBitWidthStr(type) + sign;
}
+const string getAsmTypeName (IntegerType type)
+{
+ const string prefix = isSigned(type) ? "%i" : "%u";
+ return prefix + getBitWidthStr(type);
+}
+
template<typename T>
BufferSp getSpecializedBuffer (deInt64 number)
{
, m_inputBuffer (getBuffer(from, number))
, m_outputBuffer (getBuffer(to, number))
{
- m_asmTypes["inputType"] = getAsmTypeDeclaration(from);
- m_asmTypes["outputType"] = getAsmTypeDeclaration(to);
+ m_asmTypes["inputType"] = getAsmTypeName(from);
+ m_asmTypes["outputType"] = getAsmTypeName(to);
if (m_features == COMPUTE_TEST_USES_INT16)
{
- m_asmTypes["int_capabilities"] = "OpCapability Int16\n";
+ m_asmTypes["int_capabilities"] = "OpCapability Int16\n"
+ "OpCapability StorageUniformBufferBlock16\n";
+ m_asmTypes["int_additional_decl"] = "%i16 = OpTypeInt 16 1\n"
+ "%u16 = OpTypeInt 16 0\n";
+ m_asmTypes["int_extensions"] = "OpExtension \"SPV_KHR_16bit_storage\"\n";
}
else if (m_features == COMPUTE_TEST_USES_INT64)
{
- m_asmTypes["int_capabilities"] = "OpCapability Int64\n";
+ m_asmTypes["int_capabilities"] = "OpCapability Int64\n";
+ m_asmTypes["int_additional_decl"] = "%i64 = OpTypeInt 64 1\n"
+ "%u64 = OpTypeInt 64 0\n";
+ m_asmTypes["int_extensions"] = "";
}
else if (m_features == COMPUTE_TEST_USES_INT16_INT64)
{
- m_asmTypes["int_capabilities"] = string("OpCapability Int16\n") +
- "OpCapability Int64\n";
+ m_asmTypes["int_capabilities"] = "OpCapability Int16\n"
+ "OpCapability StorageUniformBufferBlock16\n"
+ "OpCapability Int64\n";
+ m_asmTypes["int_additional_decl"] = "%i16 = OpTypeInt 16 1\n"
+ "%u16 = OpTypeInt 16 0\n"
+ "%i64 = OpTypeInt 64 1\n"
+ "%u64 = OpTypeInt 64 0\n";
+ m_asmTypes["int_extensions"] = "OpExtension \"SPV_KHR_16bit_storage\"\n";
}
else
{
const StringTemplate shader (
"OpCapability Shader\n"
"${int_capabilities}"
+ "${int_extensions}"
"OpMemoryModel Logical GLSL450\n"
"OpEntryPoint GLCompute %main \"main\" %id\n"
"OpExecutionMode %main LocalSize 1 1 1\n"
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%i32 = OpTypeInt 32 1\n"
+ "${int_additional_decl}"
"%uvec3 = OpTypeVector %u32 3\n"
"%uvec3ptr = OpTypePointer Input %uvec3\n"
- // Custom types
- "%in_type = ${inputType}\n"
- "%out_type = ${outputType}\n"
// Derived types
- "%in_ptr = OpTypePointer Uniform %in_type\n"
- "%out_ptr = OpTypePointer Uniform %out_type\n"
- "%in_arr = OpTypeRuntimeArray %in_type\n"
- "%out_arr = OpTypeRuntimeArray %out_type\n"
+ "%in_ptr = OpTypePointer Uniform ${inputType}\n"
+ "%out_ptr = OpTypePointer Uniform ${outputType}\n"
+ "%in_arr = OpTypeRuntimeArray ${inputType}\n"
+ "%out_arr = OpTypeRuntimeArray ${outputType}\n"
"%in_buf = OpTypeStruct %in_arr\n"
"%out_buf = OpTypeStruct %out_arr\n"
"%in_bufptr = OpTypePointer Uniform %in_buf\n"
"%out_bufptr = OpTypePointer Uniform %out_buf\n"
"%indata = OpVariable %in_bufptr Uniform\n"
"%outdata = OpVariable %out_bufptr Uniform\n"
- "%inputptr = OpTypePointer Input %in_type\n"
+ "%inputptr = OpTypePointer Input ${inputType}\n"
"%id = OpVariable %uvec3ptr Input\n"
// Constants
"%zero = OpConstant %i32 0\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %in_ptr %indata %zero %x\n"
"%outloc = OpAccessChain %out_ptr %outdata %zero %x\n"
- "%inval = OpLoad %in_type %inloc\n"
- "%conv = ${instruction} %out_type %inval\n"
+ "%inval = OpLoad ${inputType} %inloc\n"
+ "%conv = ${instruction} ${outputType} %inval\n"
" OpStore %outloc %conv\n"
" OpReturn\n"
" OpFunctionEnd\n"
spec.outputs.push_back(test->m_outputBuffer);
spec.numWorkGroups = IVec3(1, 1, 1);
+ if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
+ {
+ spec.extensions.push_back("VK_KHR_16bit_storage");
+ }
+
group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpSConvert.", spec, test->m_features));
}
spec.outputs.push_back(test->m_outputBuffer);
spec.numWorkGroups = IVec3(1, 1, 1);
+ if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
+ {
+ spec.extensions.push_back("VK_KHR_16bit_storage");
+ }
+
group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpUConvert.", spec, test->m_features));
}
return group.release();
// Vec2 to Vec4
for (int width = 2; width <= 4; ++width)
{
- string randomConst = numberToString(getInt(rnd));
- string widthStr = numberToString(width);
- int index = rnd.getInt(0, width-1);
-
- params["type"] = "vec";
- params["name"] = params["type"] + "_" + widthStr;
- params["compositeType"] = "%composite = OpTypeVector %custom " + widthStr +"\n";
- params["filler"] = string("%filler = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
- params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
- params["indexes"] = numberToString(index);
+ const string randomConst = numberToString(getInt(rnd));
+ const string widthStr = numberToString(width);
+ const string composite_type = "${customType}vec" + widthStr;
+ const int index = rnd.getInt(0, width-1);
+
+ params["type"] = "vec";
+ params["name"] = params["type"] + "_" + widthStr;
+ params["compositeDecl"] = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
+ params["compositeType"] = composite_type;
+ params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
+ params["compositeConstruct"] = "%instance = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
+ params["indexes"] = numberToString(index);
testCases.push_back(params);
}
}
string widthStr = numberToString(width);
int index = rnd.getInt(0, width-1);
- params["type"] = "array";
- params["name"] = params["type"] + "_" + widthStr;
- params["compositeType"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
- + "%composite = OpTypeArray %custom %arraywidth\n";
-
- params["filler"] = string("%filler = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
+ params["type"] = "array";
+ params["name"] = params["type"] + "_" + widthStr;
+ params["compositeDecl"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
+ + "%composite = OpTypeArray ${customType} %arraywidth\n";
+ params["compositeType"] = "%composite";
+ params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
- params["indexes"] = numberToString(index);
+ params["indexes"] = numberToString(index);
testCases.push_back(params);
}
}
string randomConst = numberToString(getInt(rnd));
int index = rnd.getInt(0, width-1);
- params["type"] = "struct";
- params["name"] = params["type"] + "_" + numberToString(width);
- params["compositeType"] = "%composite = OpTypeStruct" + repeatString(" %custom", width) + "\n";
- params["filler"] = string("%filler = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
+ params["type"] = "struct";
+ params["name"] = params["type"] + "_" + numberToString(width);
+ params["compositeDecl"] = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
+ params["compositeType"] = "%composite";
+ params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
- params["indexes"] = numberToString(index);
+ params["indexes"] = numberToString(index);
testCases.push_back(params);
}
}
int index_1 = rnd.getInt(0, width-1);
string columnStr = numberToString(column);
- params["type"] = "matrix";
- params["name"] = params["type"] + "_" + widthStr + "x" + columnStr;
- params["compositeType"] = string("%vectype = OpTypeVector %custom " + widthStr + "\n")
+ params["type"] = "matrix";
+ params["name"] = params["type"] + "_" + widthStr + "x" + columnStr;
+ params["compositeDecl"] = string("%vectype = OpTypeVector ${customType} " + widthStr + "\n")
+ "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
+ params["compositeType"] = "%composite";
- params["filler"] = string("%filler = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n"
+ params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
+ "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
- params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
+ params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
testCases.push_back(params);
}
}
}
}
+const string getAssemblyTypeName (const NumberType type)
+{
+ switch (type)
+ {
+ case NUMBERTYPE_INT32: return "%i32";
+ case NUMBERTYPE_UINT32: return "%u32";
+ case NUMBERTYPE_FLOAT32: return "%f32";
+ default: DE_ASSERT(false); return "";
+ }
+}
+
const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
{
map<string, string> parameters(params);
- parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
-
+ const string customType = getAssemblyTypeName(type);
+ map<string, string> substCustomType;
+ substCustomType["customType"] = customType;
+ parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
+ parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
+ parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
+ parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
+ parameters["customType"] = customType;
parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
- return StringTemplate (
+ if (parameters.at("compositeType") != "%u32vec3")
+ {
+ parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
+ }
+
+ return StringTemplate(
"OpCapability Shader\n"
"OpCapability Matrix\n"
"OpMemoryModel Logical GLSL450\n"
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%i32 = OpTypeInt 32 1\n"
- "%uvec3 = OpTypeVector %u32 3\n"
- "%uvec3ptr = OpTypePointer Input %uvec3\n"
+ "%f32 = OpTypeFloat 32\n"
- // Custom type
- "%custom = ${typeDeclaration}\n"
- "${compositeType}"
+ // Composite declaration
+ "${compositeDecl}"
// Constants
"${filler}"
+ "${u32vec3Decl:opt}"
+ "%uvec3ptr = OpTypePointer Input %u32vec3\n"
+
// Inherited from custom
- "%customptr = OpTypePointer Uniform %custom\n"
- "%customarr = OpTypeRuntimeArray %custom\n"
+ "%customptr = OpTypePointer Uniform ${customType}\n"
+ "%customarr = OpTypeRuntimeArray ${customType}\n"
"%buf = OpTypeStruct %customarr\n"
"%bufptr = OpTypePointer Uniform %buf\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
- "%idval = OpLoad %uvec3 %id\n"
+ "%idval = OpLoad %u32vec3 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %customptr %indata %zero %x\n"
"%outloc = OpAccessChain %customptr %outdata %zero %x\n"
// Read the input value
- "%inval = OpLoad %custom %inloc\n"
+ "%inval = OpLoad ${customType} %inloc\n"
// Create the composite and fill it
"${compositeConstruct}"
// Insert the input value to a place
- "%instance2 = OpCompositeInsert %composite %inval %instance ${indexes}\n"
+ "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
// Read back the value from the position
- "%out_val = OpCompositeExtract %custom %instance2 ${indexes}\n"
+ "%out_val = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
// Store it in the output position
" OpStore %outloc %out_val\n"
" OpReturn\n"
vector<string> indexes = de::splitString(fullIndex, ' ');
map<string, string> parameters (params);
- parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
- parameters["structType"] = repeatString(" %composite", structInfo.components);
+ parameters["structType"] = repeatString(" ${compositeType}", structInfo.components);
parameters["structConstruct"] = repeatString(" %instance", structInfo.components);
- parameters["insertIndexes"] = fullIndex;
+ parameters["insertIndexes"] = fullIndex;
// In matrix cases the last two index is the CompositeExtract indexes
const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
- return StringTemplate (
+ const string customType = getAssemblyTypeName(type);
+ map<string, string> substCustomType;
+ substCustomType["customType"] = customType;
+ parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
+ parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
+ parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
+ parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
+ parameters["customType"] = customType;
+
+ const string compositeType = parameters.at("compositeType");
+ map<string, string> substCompositeType;
+ substCompositeType["compositeType"] = compositeType;
+ parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
+ if (compositeType != "%u32vec3")
+ {
+ parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
+ }
+
+ return StringTemplate(
"OpCapability Shader\n"
"OpCapability Matrix\n"
"OpMemoryModel Logical GLSL450\n"
// General types
"%void = OpTypeVoid\n"
"%voidf = OpTypeFunction %void\n"
+ "%i32 = OpTypeInt 32 1\n"
"%u32 = OpTypeInt 32 0\n"
- "%uvec3 = OpTypeVector %u32 3\n"
- "%uvec3ptr = OpTypePointer Input %uvec3\n"
- // Custom type
- "%custom = ${typeDeclaration}\n"
+ "%f32 = OpTypeFloat 32\n"
// Custom types
- "${compositeType}"
+ "${compositeDecl}"
+ // %u32vec3 if not already declared in ${compositeDecl}
+ "${u32vec3Decl:opt}"
+ "%uvec3ptr = OpTypePointer Input %u32vec3\n"
// Inherited from composite
- "%composite_p = OpTypePointer Function %composite\n"
+ "%composite_p = OpTypePointer Function ${compositeType}\n"
"%struct_t = OpTypeStruct${structType}\n"
"%struct_p = OpTypePointer Function %struct_t\n"
// Constants
"${filler}"
"${accessChainConstDeclaration}"
// Inherited from custom
- "%customptr = OpTypePointer Uniform %custom\n"
- "%customarr = OpTypeRuntimeArray %custom\n"
+ "%customptr = OpTypePointer Uniform ${customType}\n"
+ "%customarr = OpTypeRuntimeArray ${customType}\n"
"%buf = OpTypeStruct %customarr\n"
"%bufptr = OpTypePointer Uniform %buf\n"
"%indata = OpVariable %bufptr Uniform\n"
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
"%struct_v = OpVariable %struct_p Function\n"
- "%idval = OpLoad %uvec3 %id\n"
+ "%idval = OpLoad %u32vec3 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
// Create the input/output type
"%inloc = OpInBoundsAccessChain %customptr %indata %zero %x\n"
"%outloc = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
// Read the input value
- "%inval = OpLoad %custom %inloc\n"
+ "%inval = OpLoad ${customType} %inloc\n"
// Create the composite and fill it
"${compositeConstruct}"
// Create the struct and fill it with the composite
" OpStore %struct_v %comp_obj\n"
// Get deepest possible composite pointer
"%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
- "%read_obj = OpLoad %composite %inner_ptr\n"
+ "%read_obj = OpLoad ${compositeType} %inner_ptr\n"
// Read back the stored value
- "%read_val = OpCompositeExtract %custom %read_obj${extractIndexes}\n"
+ "%read_val = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
" OpStore %outloc %read_val\n"
" OpReturn\n"
- " OpFunctionEnd\n").specialize(parameters);
+ " OpFunctionEnd\n"
+ ).specialize(parameters);
}
tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
{
map<string, string> parameters(params);
- parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
+ parameters["customType"] = getAssemblyTypeName(type);
// Declare the const value, and use it in the initializer
if (params.find("constValue") != params.end())
{
- parameters["constDeclaration"] = "%const = OpConstant %in_type " + params.at("constValue") + "\n";
- parameters["variableInitializer"] = "%const";
+ parameters["variableInitializer"] = " %const";
}
// Uninitialized case
else
{
- parameters["constDeclaration"] = "";
- parameters["variableInitializer"] = "";
+ parameters["commentDecl"] = ";";
}
return StringTemplate(
"%voidf = OpTypeFunction %void\n"
"%u32 = OpTypeInt 32 0\n"
"%i32 = OpTypeInt 32 1\n"
+ "%f32 = OpTypeFloat 32\n"
"%uvec3 = OpTypeVector %u32 3\n"
"%uvec3ptr = OpTypePointer Input %uvec3\n"
- // Custom types
- "%in_type = ${typeDeclaration}\n"
- // "%const = OpConstant %in_type ${constValue}\n"
- "${constDeclaration}\n"
+ "${commentDecl:opt}%const = OpConstant ${customType} ${constValue:opt}\n"
// Derived types
- "%in_ptr = OpTypePointer Uniform %in_type\n"
- "%in_arr = OpTypeRuntimeArray %in_type\n"
+ "%in_ptr = OpTypePointer Uniform ${customType}\n"
+ "%in_arr = OpTypeRuntimeArray ${customType}\n"
"%in_buf = OpTypeStruct %in_arr\n"
"%in_bufptr = OpTypePointer Uniform %in_buf\n"
"%indata = OpVariable %in_bufptr Uniform\n"
"%outdata = OpVariable %in_bufptr Uniform\n"
"%id = OpVariable %uvec3ptr Input\n"
- "%var_ptr = OpTypePointer Function %in_type\n"
+ "%var_ptr = OpTypePointer Function ${customType}\n"
// Constants
"%zero = OpConstant %i32 0\n"
// Main function
"%main = OpFunction %void None %voidf\n"
"%label = OpLabel\n"
- "%out_var = OpVariable %var_ptr Function ${variableInitializer}\n"
+ "%out_var = OpVariable %var_ptr Function${variableInitializer:opt}\n"
"%idval = OpLoad %uvec3 %id\n"
"%x = OpCompositeExtract %u32 %idval 0\n"
"%inloc = OpAccessChain %in_ptr %indata %zero %x\n"
"%outloc = OpAccessChain %in_ptr %outdata %zero %x\n"
- "%outval = OpLoad %in_type %out_var\n"
+ "%outval = OpLoad ${customType} %out_var\n"
" OpStore %outloc %outval\n"
" OpReturn\n"
" OpFunctionEnd\n"
for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
- float expected;
- memcpy(&expected, expectedOutputs[outputNdx]->data(), expectedOutputs[outputNdx]->getNumBytes());
+ vector<deUint8> expectedBytes;
+ float expected;
+ float actual;
- float actual;
- memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedOutputs[outputNdx]->getNumBytes());
+ expectedOutputs[outputNdx]->getBytes(expectedBytes);
+ memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
+ memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
// Test with epsilon
if (fabs(expected - actual) > epsilon)
// Copy and discard the result.
for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
{
- size_t width = expectedOutputs[outputNdx]->getNumBytes();
+ vector<deUint8> expectedBytes;
+ expectedOutputs[outputNdx]->getBytes(expectedBytes);
+
+ const size_t width = expectedBytes.size();
+ vector<char> data (width);
- vector<char> data(width);
memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
}
return true;
de::MovePtr<tcu::TestCaseGroup> computeTests (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
de::MovePtr<tcu::TestCaseGroup> graphicsTests (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
+ computeTests->addChild(createLocalSizeGroup(testCtx));
computeTests->addChild(createOpNopGroup(testCtx));
computeTests->addChild(createOpFUnordGroup(testCtx));
computeTests->addChild(createOpAtomicGroup(testCtx, false));
computeTests->addChild(createOpUnreachableGroup(testCtx));
computeTests ->addChild(createOpQuantizeToF16Group(testCtx));
computeTests ->addChild(createOpFRemGroup(testCtx));
+ computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
+ computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
+ computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
+ computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
computeTests->addChild(createSConvertTests(testCtx));
computeTests->addChild(createUConvertTests(testCtx));
computeTests->addChild(createOpCompositeInsertGroup(testCtx));
computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
+ computeTests->addChild(createOpNMinGroup(testCtx));
+ computeTests->addChild(createOpNMaxGroup(testCtx));
+ computeTests->addChild(createOpNClampGroup(testCtx));
+ {
+ de::MovePtr<tcu::TestCaseGroup> computeAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
+
+ computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+ computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+
+ computeTests->addChild(computeAndroidTests.release());
+ }
+
computeTests->addChild(create16BitStorageComputeGroup(testCtx));
+ computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
+ computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
+ computeTests->addChild(createIndexingComputeGroup(testCtx));
computeTests->addChild(createVariablePointersComputeGroup(testCtx));
+ computeTests->addChild(createImageSamplerComputeGroup(testCtx));
graphicsTests->addChild(createOpNopTests(testCtx));
graphicsTests->addChild(createOpSourceTests(testCtx));
graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
graphicsTests->addChild(createBarrierTests(testCtx));
graphicsTests->addChild(createDecorationGroupTests(testCtx));
graphicsTests->addChild(createFRemTests(testCtx));
+ graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
+ graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
+
+ {
+ de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
+
+ graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+ graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+
+ graphicsTests->addChild(graphicsAndroidTests.release());
+ }
graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
+ graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
+ graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
+ graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
+ graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
instructionTests->addChild(computeTests.release());
instructionTests->addChild(graphicsTests.release());