Modify spirv_assembly tests adding dependency on 16bit_storage extension
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / spirv_assembly / vktSpvAsmInstructionTests.cpp
index 06348d5..1f65718 100644 (file)
@@ -31,6 +31,7 @@
 #include "tcuStringTemplate.hpp"
 #include "tcuTestLog.hpp"
 #include "tcuVectorUtil.hpp"
+#include "tcuInterval.hpp"
 
 #include "vkDefs.hpp"
 #include "vkDeviceUtil.hpp"
 #include "vkStrUtil.hpp"
 #include "vkTypeUtil.hpp"
 
-#include "deRandom.hpp"
 #include "deStringUtil.hpp"
 #include "deUniquePtr.hpp"
+#include "deMath.h"
 #include "tcuStringTemplate.hpp"
 
 #include "vktSpvAsm16bitStorageTests.hpp"
+#include "vktSpvAsmUboMatrixPaddingTests.hpp"
+#include "vktSpvAsmConditionalBranchTests.hpp"
+#include "vktSpvAsmIndexingTests.hpp"
+#include "vktSpvAsmImageSamplerTests.hpp"
 #include "vktSpvAsmComputeShaderCase.hpp"
 #include "vktSpvAsmComputeShaderTestUtil.hpp"
 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
@@ -61,6 +66,7 @@
 #include <string>
 #include <sstream>
 #include <utility>
+#include <stack>
 
 namespace vkt
 {
@@ -108,6 +114,50 @@ static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* ds
        }
 }
 
+// Gets a 64-bit integer with a more logarithmic distribution
+deInt64 randomInt64LogDistributed (de::Random& rnd)
+{
+       deInt64 val = rnd.getUint64();
+       val &= (1ull << rnd.getInt(1, 63)) - 1;
+       if (rnd.getBool())
+               val = -val;
+       return val;
+}
+
+static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
+{
+       for (int ndx = 0; ndx < numValues; ndx++)
+               dst[ndx] = randomInt64LogDistributed(rnd);
+}
+
+template<typename FilterT>
+static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
+{
+       for (int ndx = 0; ndx < numValues; ndx++)
+       {
+               deInt64 value;
+               do {
+                       value = randomInt64LogDistributed(rnd);
+               } while (!filter(value));
+               dst[ndx] = value;
+       }
+}
+
+inline bool filterNonNegative (const deInt64 value)
+{
+       return value >= 0;
+}
+
+inline bool filterPositive (const deInt64 value)
+{
+       return value > 0;
+}
+
+inline bool filterNotZero (const deInt64 value)
+{
+       return value != 0;
+}
+
 static void floorAll (vector<float>& values)
 {
        for (size_t i = 0; i < values.size(); i++)
@@ -128,7 +178,7 @@ struct CaseParameter
        CaseParameter   (const char* case_, const string& param_) : name(case_), param(param_) {}
 };
 
-// Assembly code used for testing OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
+// Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
 //
 // #version 430
 //
@@ -146,6 +196,119 @@ struct CaseParameter
 //   output_data.elements[x] = -input_data.elements[x];
 // }
 
+static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
+{
+       std::ostringstream out;
+       out << getComputeAsmShaderPreambleWithoutLocalSize();
+
+       if (useLiteralLocalSize)
+       {
+               out << "OpExecutionMode %main LocalSize "
+                       << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
+       }
+
+       out << "OpSource GLSL 430\n"
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+               "OpDecorate %id BuiltIn GlobalInvocationId\n";
+
+       if (useSpecConstantWorkgroupSize)
+       {
+               out << "OpDecorate %spec_0 SpecId 100\n"
+                       << "OpDecorate %spec_1 SpecId 101\n"
+                       << "OpDecorate %spec_2 SpecId 102\n"
+                       << "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
+       }
+
+       out << getComputeAsmInputOutputBufferTraits()
+               << getComputeAsmCommonTypes()
+               << getComputeAsmInputOutputBuffer()
+               << "%id        = OpVariable %uvec3ptr Input\n"
+               << "%zero      = OpConstant %i32 0 \n";
+
+       if (useSpecConstantWorkgroupSize)
+       {
+               out     << "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
+                       << "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
+                       << "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
+                       << "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
+       }
+
+       out << "%main      = OpFunction %void None %voidf\n"
+               << "%label     = OpLabel\n"
+               << "%idval     = OpLoad %uvec3 %id\n"
+               << "%ndx         = OpCompositeExtract %u32 %idval " << ndx << "\n"
+
+                       "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
+                       "%inval     = OpLoad %f32 %inloc\n"
+                       "%neg       = OpFNegate %f32 %inval\n"
+                       "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
+                       "             OpStore %outloc %neg\n"
+                       "             OpReturn\n"
+                       "             OpFunctionEnd\n";
+       return out.str();
+}
+
+tcu::TestCaseGroup* createLocalSizeGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "localsize", ""));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const deUint32                                  numElements             = 64u;
+       vector<float>                                   positiveFloats  (numElements, 0);
+       vector<float>                                   negativeFloats  (numElements, 0);
+
+       fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+               negativeFloats[ndx] = -positiveFloats[ndx];
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
+
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+
+       spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, 1), 0u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, 1), 0u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, 1), 0u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
+
+       spec.numWorkGroups = IVec3(1, 1, 1);
+
+       spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(numElements, 1, 1), 0u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(numElements, 1, 1), 0u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(numElements, 1, 1), 0u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, numElements, 1), 1u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, numElements, 1), 1u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, numElements, 1), 1u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, numElements), 2u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, numElements), 2u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
+
+       spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, numElements), 2u);
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
+
+       return group.release();
+}
+
 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
 {
        de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
@@ -204,14 +367,21 @@ bool compareFUnord (const std::vector<BufferSp>& inputs, const vector<Allocation
        if (outputAllocs.size() != 1)
                return false;
 
-       const BufferSp& expectedOutput                  = expectedOutputs[0];
-       const deInt32*  expectedOutputAsInt             = static_cast<const deInt32*>(expectedOutputs[0]->data());
-       const deInt32*  outputAsInt                             = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
-       const float*    input1AsFloat                   = static_cast<const float*>(inputs[0]->data());
-       const float*    input2AsFloat                   = static_cast<const float*>(inputs[1]->data());
-       bool returnValue                                                = true;
+       vector<deUint8> input1Bytes;
+       vector<deUint8> input2Bytes;
+       vector<deUint8> expectedBytes;
+
+       inputs[0]->getBytes(input1Bytes);
+       inputs[1]->getBytes(input2Bytes);
+       expectedOutputs[0]->getBytes(expectedBytes);
+
+       const deInt32* const    expectedOutputAsInt             = reinterpret_cast<const deInt32* const>(&expectedBytes.front());
+       const deInt32* const    outputAsInt                             = static_cast<const deInt32* const>(outputAllocs[0]->getHostPtr());
+       const float* const              input1AsFloat                   = reinterpret_cast<const float* const>(&input1Bytes.front());
+       const float* const              input2AsFloat                   = reinterpret_cast<const float* const>(&input2Bytes.front());
+       bool returnValue                                                                = true;
 
-       for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(deInt32); ++idx)
+       for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
        {
                if (outputAsInt[idx] != expectedOutputAsInt[idx])
                {
@@ -355,14 +525,14 @@ struct OpAtomicCase
 {
        const char*             name;
        const char*             assembly;
-       void                    (*calculateExpected)(deInt32&, deInt32);
+       OpAtomicType    opAtomic;
        deInt32                 numOutputElements;
 
-                                       OpAtomicCase                    (const char* _name, const char* _assembly, void (*_calculateExpected)(deInt32&, deInt32), deInt32 _numOutputElements)
+                                       OpAtomicCase                    (const char* _name, const char* _assembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
                                                : name                          (_name)
                                                , assembly                      (_assembly)
-                                               , calculateExpected     (_calculateExpected)
-                                               , numOutputElements (_numOutputElements) {}
+                                               , opAtomic                      (_opAtomic)
+                                               , numOutputElements     (_numOutputElements) {}
 };
 
 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer)
@@ -370,8 +540,7 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
        de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx,
                                                                                                                                                                useStorageBuffer ? "opatomic_storage_buffer" : "opatomic",
                                                                                                                                                                "Test the OpAtomic* opcodes"));
-       de::Random                                              rnd                                     (deStringHash(group->getName()));
-       const int                                               numElements                     = 1000000;
+       const int                                               numElements                     = 65535;
        vector<OpAtomicCase>                    cases;
 
        const StringTemplate                    shaderTemplate  (
@@ -400,14 +569,7 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
                "OpMemberDecorate %sumbuf 0 Coherent\n"
                "OpMemberDecorate %sumbuf 0 Offset 0\n"
 
-               "%void      = OpTypeVoid\n"
-               "%voidf     = OpTypeFunction %void\n"
-               "%u32       = OpTypeInt 32 0\n"
-               "%i32       = OpTypeInt 32 1\n"
-               "%uvec3     = OpTypeVector %u32 3\n"
-               "%uvec3ptr  = OpTypePointer Input %uvec3\n"
-               "%i32ptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %i32\n"
-               "%i32arr    = OpTypeRuntimeArray %i32\n"
+               + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
 
                "%buf       = OpTypeStruct %i32arr\n"
                "%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
@@ -437,25 +599,24 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
                "             OpReturn\n"
                "             OpFunctionEnd\n");
 
-       #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, NUM_OUTPUT_ELEMENTS) \
+       #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
        do { \
                DE_STATIC_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
-               struct calculateExpected_##NAME { static void calculateExpected(deInt32& expected, deInt32 input) CALCULATE_EXPECTED }; /* NOLINT(CALCULATE_EXPECTED) */ \
-               cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, calculateExpected_##NAME::calculateExpected, NUM_OUTPUT_ELEMENTS)); \
+               cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
        } while (deGetFalse())
-       #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, CALCULATE_EXPECTED) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, 1)
-       #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, CALCULATE_EXPECTED) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, numElements)
+       #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, 1)
+       #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, numElements)
 
-       ADD_OPATOMIC_CASE_1(iadd,       "%unused    = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", { expected += input; } );
-       ADD_OPATOMIC_CASE_1(isub,       "%unused    = OpAtomicISub %i32 %outloc %one %zero %inval\n", { expected -= input; } );
-       ADD_OPATOMIC_CASE_1(iinc,       "%unused    = OpAtomicIIncrement %i32 %outloc %one %zero\n",  { ++expected; (void)input;} );
-       ADD_OPATOMIC_CASE_1(idec,       "%unused    = OpAtomicIDecrement %i32 %outloc %one %zero\n",  { --expected; (void)input;} );
+       ADD_OPATOMIC_CASE_1(iadd,       "%unused    = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", OPATOMIC_IADD );
+       ADD_OPATOMIC_CASE_1(isub,       "%unused    = OpAtomicISub %i32 %outloc %one %zero %inval\n", OPATOMIC_ISUB );
+       ADD_OPATOMIC_CASE_1(iinc,       "%unused    = OpAtomicIIncrement %i32 %outloc %one %zero\n",  OPATOMIC_IINC );
+       ADD_OPATOMIC_CASE_1(idec,       "%unused    = OpAtomicIDecrement %i32 %outloc %one %zero\n",  OPATOMIC_IDEC );
        ADD_OPATOMIC_CASE_N(load,       "%inval2    = OpAtomicLoad %i32 %inloc %zero %zero\n"
-                                                               "             OpStore %outloc %inval2\n",  { expected = input;} );
-       ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc %zero %zero %inval\n",  { expected = input;} );
+                                                               "             OpStore %outloc %inval2\n",  OPATOMIC_LOAD );
+       ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc %zero %zero %inval\n",  OPATOMIC_STORE );
        ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
                                                                "             OpStore %outloc %even\n"
-                                                               "%unused    = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n",  { expected = (input % 2) == 0 ? -1 : 1;} );
+                                                               "%unused    = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n",  OPATOMIC_COMPEX );
 
        #undef ADD_OPATOMIC_CASE
        #undef ADD_OPATOMIC_CASE_1
@@ -477,14 +638,8 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
                if (useStorageBuffer)
                        spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
 
-               fillRandomScalars(rnd, 1, 100, &inputInts[0], numElements);
-               for (size_t ndx = 0; ndx < numElements; ++ndx)
-               {
-                       cases[caseNdx].calculateExpected((cases[caseNdx].numOutputElements == 1) ? expected[0] : expected[ndx], inputInts[ndx]);
-               }
-
-               spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
-               spec.outputs.push_back(BufferSp(new Int32Buffer(expected)));
+               spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
+               spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
                spec.numWorkGroups = IVec3(numElements, 1, 1);
                group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
        }
@@ -629,11 +784,12 @@ bool compareNoContractCase(const std::vector<BufferSp>&, const vector<Allocation
        if (outputAllocs.size() != 1)
                return false;
 
-       // We really just need this for size because we are not comparing the exact values.
-       const BufferSp& expectedOutput  = expectedOutputs[0];
-       const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+       // Only size is needed because we are not comparing the exact values.
+       size_t byteSize = expectedOutputs[0]->getByteSize();
 
-       for(size_t i = 0; i < expectedOutput->getNumBytes() / sizeof(float); ++i) {
+       const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+       for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
                if (outputAsFloat[i] != 0.f &&
                        outputAsFloat[i] != -ldexp(1, -24)) {
                        return false;
@@ -738,11 +894,13 @@ bool compareFRem(const std::vector<BufferSp>&, const vector<AllocationSp>& outpu
        if (outputAllocs.size() != 1)
                return false;
 
-       const BufferSp& expectedOutput = expectedOutputs[0];
-       const float *expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
-       const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+       vector<deUint8> expectedBytes;
+       expectedOutputs[0]->getBytes(expectedBytes);
+
+       const float*    expectedOutputAsFloat   = reinterpret_cast<const float*>(&expectedBytes.front());
+       const float*    outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
 
-       for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+       for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
        {
                const float f0 = expectedOutputAsFloat[idx];
                const float f1 = outputAsFloat[idx];
@@ -796,38 +954,874 @@ tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
                "OpDecorate %f32arr ArrayStride 4\n"
                "OpMemberDecorate %buf 0 Offset 0\n"
 
-               + string(getComputeAsmCommonTypes()) +
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%rem       = OpFRem %f32 %inval1 %inval2\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareFRem;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
+bool compareNMin (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+       if (outputAllocs.size() != 1)
+               return false;
+
+       const BufferSp&                 expectedOutput                  (expectedOutputs[0]);
+       std::vector<deUint8>    data;
+       expectedOutput->getBytes(data);
+
+       const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
+       const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+       for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
+       {
+               const float f0 = expectedOutputAsFloat[idx];
+               const float f1 = outputAsFloat[idx];
+
+               // For NMin, we accept NaN as output if both inputs were NaN.
+               // Otherwise the NaN is the wrong choise, as on architectures that
+               // do not handle NaN, those are huge values.
+               if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
+                       return false;
+       }
+
+       return true;
+}
+
+tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+       vector<float>                                   inputFloats1    (numElements, 0);
+       vector<float>                                   inputFloats2    (numElements, 0);
+       vector<float>                                   outputFloats    (numElements, 0);
+
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+       // Make the first case a full-NAN case.
+       inputFloats1[0] = TCU_NAN;
+       inputFloats2[0] = TCU_NAN;
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // By default, pick the smallest
+               outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
+
+               // Make half of the cases NaN cases
+               if ((ndx & 1) == 0)
+               {
+                       // Alternate between the NaN operand
+                       if ((ndx & 2) == 0)
+                       {
+                               outputFloats[ndx] = inputFloats2[ndx];
+                               inputFloats1[ndx] = TCU_NAN;
+                       }
+                       else
+                       {
+                               outputFloats[ndx] = inputFloats1[ndx];
+                               inputFloats2[ndx] = TCU_NAN;
+                       }
+               }
+       }
+
+       spec.assembly =
+               "OpCapability Shader\n"
+               "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               "OpDecorate %buf BufferBlock\n"
+               "OpDecorate %indata1 DescriptorSet 0\n"
+               "OpDecorate %indata1 Binding 0\n"
+               "OpDecorate %indata2 DescriptorSet 0\n"
+               "OpDecorate %indata2 Binding 1\n"
+               "OpDecorate %outdata DescriptorSet 0\n"
+               "OpDecorate %outdata Binding 2\n"
+               "OpDecorate %f32arr ArrayStride 4\n"
+               "OpMemberDecorate %buf 0 Offset 0\n"
+
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareNMin;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
+bool compareNMax (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+       if (outputAllocs.size() != 1)
+               return false;
+
+       const BufferSp&                 expectedOutput                  = expectedOutputs[0];
+       std::vector<deUint8>    data;
+       expectedOutput->getBytes(data);
+
+       const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
+       const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+       for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
+       {
+               const float f0 = expectedOutputAsFloat[idx];
+               const float f1 = outputAsFloat[idx];
+
+               // For NMax, NaN is considered acceptable result, since in
+               // architectures that do not handle NaNs, those are huge values.
+               if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
+                       return false;
+       }
+
+       return true;
+}
+
+tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+       vector<float>                                   inputFloats1    (numElements, 0);
+       vector<float>                                   inputFloats2    (numElements, 0);
+       vector<float>                                   outputFloats    (numElements, 0);
+
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+
+       // Make the first case a full-NAN case.
+       inputFloats1[0] = TCU_NAN;
+       inputFloats2[0] = TCU_NAN;
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // By default, pick the biggest
+               outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+
+               // Make half of the cases NaN cases
+               if ((ndx & 1) == 0)
+               {
+                       // Alternate between the NaN operand
+                       if ((ndx & 2) == 0)
+                       {
+                               outputFloats[ndx] = inputFloats2[ndx];
+                               inputFloats1[ndx] = TCU_NAN;
+                       }
+                       else
+                       {
+                               outputFloats[ndx] = inputFloats1[ndx];
+                               inputFloats2[ndx] = TCU_NAN;
+                       }
+               }
+       }
+
+       spec.assembly =
+               "OpCapability Shader\n"
+               "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               "OpDecorate %buf BufferBlock\n"
+               "OpDecorate %indata1 DescriptorSet 0\n"
+               "OpDecorate %indata1 Binding 0\n"
+               "OpDecorate %indata2 DescriptorSet 0\n"
+               "OpDecorate %indata2 Binding 1\n"
+               "OpDecorate %outdata DescriptorSet 0\n"
+               "OpDecorate %outdata Binding 2\n"
+               "OpDecorate %f32arr ArrayStride 4\n"
+               "OpMemberDecorate %buf 0 Offset 0\n"
+
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareNMax;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
+bool compareNClamp (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs, TestLog&)
+{
+       if (outputAllocs.size() != 1)
+               return false;
+
+       const BufferSp&                 expectedOutput                  = expectedOutputs[0];
+       std::vector<deUint8>    data;
+       expectedOutput->getBytes(data);
+
+       const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
+       const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+       for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
+       {
+               const float e0 = expectedOutputAsFloat[idx * 2];
+               const float e1 = expectedOutputAsFloat[idx * 2 + 1];
+               const float res = outputAsFloat[idx];
+
+               // For NClamp, we have two possible outcomes based on
+               // whether NaNs are handled or not.
+               // If either min or max value is NaN, the result is undefined,
+               // so this test doesn't stress those. If the clamped value is
+               // NaN, and NaNs are handled, the result is min; if NaNs are not
+               // handled, they are big values that result in max.
+               // If all three parameters are NaN, the result should be NaN.
+               if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
+                        (deFloatAbs(e0 - res) < 0.00001f) ||
+                        (deFloatAbs(e1 - res) < 0.00001f)))
+                       return false;
+       }
+
+       return true;
+}
+
+tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
+       ComputeShaderSpec                               spec;
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+       vector<float>                                   inputFloats1    (numElements, 0);
+       vector<float>                                   inputFloats2    (numElements, 0);
+       vector<float>                                   inputFloats3    (numElements, 0);
+       vector<float>                                   outputFloats    (numElements * 2, 0);
+
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
+       fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // Results are only defined if max value is bigger than min value.
+               if (inputFloats2[ndx] > inputFloats3[ndx])
+               {
+                       float t = inputFloats2[ndx];
+                       inputFloats2[ndx] = inputFloats3[ndx];
+                       inputFloats3[ndx] = t;
+               }
+
+               // By default, do the clamp, setting both possible answers
+               float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
+
+               float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
+               float maxResB = maxResA;
+
+               // Alternate between the NaN cases
+               if (ndx & 1)
+               {
+                       inputFloats1[ndx] = TCU_NAN;
+                       // If NaN is handled, the result should be same as the clamp minimum.
+                       // If NaN is not handled, the result should clamp to the clamp maximum.
+                       maxResA = inputFloats2[ndx];
+                       maxResB = inputFloats3[ndx];
+               }
+               else
+               {
+                       // Not a NaN case - only one legal result.
+                       maxResA = defaultRes;
+                       maxResB = defaultRes;
+               }
+
+               outputFloats[ndx * 2] = maxResA;
+               outputFloats[ndx * 2 + 1] = maxResB;
+       }
+
+       // Make the first case a full-NAN case.
+       inputFloats1[0] = TCU_NAN;
+       inputFloats2[0] = TCU_NAN;
+       inputFloats3[0] = TCU_NAN;
+       outputFloats[0] = TCU_NAN;
+       outputFloats[1] = TCU_NAN;
+
+       spec.assembly =
+               "OpCapability Shader\n"
+               "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpName %main           \"main\"\n"
+               "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               "OpDecorate %buf BufferBlock\n"
+               "OpDecorate %indata1 DescriptorSet 0\n"
+               "OpDecorate %indata1 Binding 0\n"
+               "OpDecorate %indata2 DescriptorSet 0\n"
+               "OpDecorate %indata2 Binding 1\n"
+               "OpDecorate %indata3 DescriptorSet 0\n"
+               "OpDecorate %indata3 Binding 2\n"
+               "OpDecorate %outdata DescriptorSet 0\n"
+               "OpDecorate %outdata Binding 3\n"
+               "OpDecorate %f32arr ArrayStride 4\n"
+               "OpMemberDecorate %buf 0 Offset 0\n"
+
+               + string(getComputeAsmCommonTypes()) +
+
+               "%buf        = OpTypeStruct %f32arr\n"
+               "%bufptr     = OpTypePointer Uniform %buf\n"
+               "%indata1    = OpVariable %bufptr Uniform\n"
+               "%indata2    = OpVariable %bufptr Uniform\n"
+               "%indata3    = OpVariable %bufptr Uniform\n"
+               "%outdata    = OpVariable %bufptr Uniform\n"
+
+               "%id        = OpVariable %uvec3ptr Input\n"
+               "%zero      = OpConstant %i32 0\n"
+
+               "%main      = OpFunction %void None %voidf\n"
+               "%label     = OpLabel\n"
+               "%idval     = OpLoad %uvec3 %id\n"
+               "%x         = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
+               "%inval1    = OpLoad %f32 %inloc1\n"
+               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
+               "%inval2    = OpLoad %f32 %inloc2\n"
+               "%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
+               "%inval3    = OpLoad %f32 %inloc3\n"
+               "%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
+               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "             OpStore %outloc %rem\n"
+               "             OpReturn\n"
+               "             OpFunctionEnd\n";
+
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
+       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
+       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       spec.numWorkGroups = IVec3(numElements, 1, 1);
+       spec.verifyIO = &compareNClamp;
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+
+       return group.release();
+}
+
+tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+
+       const struct CaseParams
+       {
+               const char*             name;
+               const char*             failMessage;            // customized status message
+               qpTestResult    failResult;                     // override status on failure
+               int                             op1Min, op1Max;         // operand ranges
+               int                             op2Min, op2Max;
+       } cases[] =
+       {
+               { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
+               { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
+       };
+       // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+       for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+       {
+               const CaseParams&       params          = cases[caseNdx];
+               ComputeShaderSpec       spec;
+               vector<deInt32>         inputInts1      (numElements, 0);
+               vector<deInt32>         inputInts2      (numElements, 0);
+               vector<deInt32>         outputInts      (numElements, 0);
+
+               fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
+               fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
+
+               for (int ndx = 0; ndx < numElements; ++ndx)
+               {
+                       // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
+                       outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
+               }
+
+               spec.assembly =
+                       string(getComputeAsmShaderPreamble()) +
+
+                       "OpName %main           \"main\"\n"
+                       "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+                       "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+                       "OpDecorate %buf BufferBlock\n"
+                       "OpDecorate %indata1 DescriptorSet 0\n"
+                       "OpDecorate %indata1 Binding 0\n"
+                       "OpDecorate %indata2 DescriptorSet 0\n"
+                       "OpDecorate %indata2 Binding 1\n"
+                       "OpDecorate %outdata DescriptorSet 0\n"
+                       "OpDecorate %outdata Binding 2\n"
+                       "OpDecorate %i32arr ArrayStride 4\n"
+                       "OpMemberDecorate %buf 0 Offset 0\n"
+
+                       + string(getComputeAsmCommonTypes()) +
+
+                       "%buf        = OpTypeStruct %i32arr\n"
+                       "%bufptr     = OpTypePointer Uniform %buf\n"
+                       "%indata1    = OpVariable %bufptr Uniform\n"
+                       "%indata2    = OpVariable %bufptr Uniform\n"
+                       "%outdata    = OpVariable %bufptr Uniform\n"
+
+                       "%id        = OpVariable %uvec3ptr Input\n"
+                       "%zero      = OpConstant %i32 0\n"
+
+                       "%main      = OpFunction %void None %voidf\n"
+                       "%label     = OpLabel\n"
+                       "%idval     = OpLoad %uvec3 %id\n"
+                       "%x         = OpCompositeExtract %u32 %idval 0\n"
+                       "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
+                       "%inval1    = OpLoad %i32 %inloc1\n"
+                       "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
+                       "%inval2    = OpLoad %i32 %inloc2\n"
+                       "%rem       = OpSRem %i32 %inval1 %inval2\n"
+                       "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
+                       "             OpStore %outloc %rem\n"
+                       "             OpReturn\n"
+                       "             OpFunctionEnd\n";
+
+               spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
+               spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
+               spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
+               spec.numWorkGroups              = IVec3(numElements, 1, 1);
+               spec.failResult                 = params.failResult;
+               spec.failMessage                = params.failMessage;
+
+               group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
+       }
+
+       return group.release();
+}
+
+tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+
+       const struct CaseParams
+       {
+               const char*             name;
+               const char*             failMessage;            // customized status message
+               qpTestResult    failResult;                     // override status on failure
+               bool                    positive;
+       } cases[] =
+       {
+               { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
+               { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
+       };
+       // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+       for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+       {
+               const CaseParams&       params          = cases[caseNdx];
+               ComputeShaderSpec       spec;
+               vector<deInt64>         inputInts1      (numElements, 0);
+               vector<deInt64>         inputInts2      (numElements, 0);
+               vector<deInt64>         outputInts      (numElements, 0);
+
+               if (params.positive)
+               {
+                       fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
+                       fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
+               }
+               else
+               {
+                       fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
+                       fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
+               }
+
+               for (int ndx = 0; ndx < numElements; ++ndx)
+               {
+                       // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
+                       outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
+               }
+
+               spec.assembly =
+                       "OpCapability Int64\n"
+
+                       + string(getComputeAsmShaderPreamble()) +
+
+                       "OpName %main           \"main\"\n"
+                       "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+                       "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+                       "OpDecorate %buf BufferBlock\n"
+                       "OpDecorate %indata1 DescriptorSet 0\n"
+                       "OpDecorate %indata1 Binding 0\n"
+                       "OpDecorate %indata2 DescriptorSet 0\n"
+                       "OpDecorate %indata2 Binding 1\n"
+                       "OpDecorate %outdata DescriptorSet 0\n"
+                       "OpDecorate %outdata Binding 2\n"
+                       "OpDecorate %i64arr ArrayStride 8\n"
+                       "OpMemberDecorate %buf 0 Offset 0\n"
+
+                       + string(getComputeAsmCommonTypes())
+                       + string(getComputeAsmCommonInt64Types()) +
+
+                       "%buf        = OpTypeStruct %i64arr\n"
+                       "%bufptr     = OpTypePointer Uniform %buf\n"
+                       "%indata1    = OpVariable %bufptr Uniform\n"
+                       "%indata2    = OpVariable %bufptr Uniform\n"
+                       "%outdata    = OpVariable %bufptr Uniform\n"
+
+                       "%id        = OpVariable %uvec3ptr Input\n"
+                       "%zero      = OpConstant %i64 0\n"
+
+                       "%main      = OpFunction %void None %voidf\n"
+                       "%label     = OpLabel\n"
+                       "%idval     = OpLoad %uvec3 %id\n"
+                       "%x         = OpCompositeExtract %u32 %idval 0\n"
+                       "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
+                       "%inval1    = OpLoad %i64 %inloc1\n"
+                       "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
+                       "%inval2    = OpLoad %i64 %inloc2\n"
+                       "%rem       = OpSRem %i64 %inval1 %inval2\n"
+                       "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
+                       "             OpStore %outloc %rem\n"
+                       "             OpReturn\n"
+                       "             OpFunctionEnd\n";
+
+               spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
+               spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
+               spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
+               spec.numWorkGroups              = IVec3(numElements, 1, 1);
+               spec.failResult                 = params.failResult;
+               spec.failMessage                = params.failMessage;
+
+               group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
+       }
+
+       return group.release();
+}
+
+tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+
+       const struct CaseParams
+       {
+               const char*             name;
+               const char*             failMessage;            // customized status message
+               qpTestResult    failResult;                     // override status on failure
+               int                             op1Min, op1Max;         // operand ranges
+               int                             op2Min, op2Max;
+       } cases[] =
+       {
+               { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
+               { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
+       };
+       // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+       for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+       {
+               const CaseParams&       params          = cases[caseNdx];
+
+               ComputeShaderSpec       spec;
+               vector<deInt32>         inputInts1      (numElements, 0);
+               vector<deInt32>         inputInts2      (numElements, 0);
+               vector<deInt32>         outputInts      (numElements, 0);
+
+               fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
+               fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
+
+               for (int ndx = 0; ndx < numElements; ++ndx)
+               {
+                       deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
+                       if (rem == 0)
+                       {
+                               outputInts[ndx] = 0;
+                       }
+                       else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
+                       {
+                               // They have the same sign
+                               outputInts[ndx] = rem;
+                       }
+                       else
+                       {
+                               // They have opposite sign.  The remainder operation takes the
+                               // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
+                               // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
+                               // the result has the correct sign and that it is still
+                               // congruent to inputInts1[ndx] modulo inputInts2[ndx]
+                               //
+                               // See also http://mathforum.org/library/drmath/view/52343.html
+                               outputInts[ndx] = rem + inputInts2[ndx];
+                       }
+               }
+
+               spec.assembly =
+                       string(getComputeAsmShaderPreamble()) +
+
+                       "OpName %main           \"main\"\n"
+                       "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+                       "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+                       "OpDecorate %buf BufferBlock\n"
+                       "OpDecorate %indata1 DescriptorSet 0\n"
+                       "OpDecorate %indata1 Binding 0\n"
+                       "OpDecorate %indata2 DescriptorSet 0\n"
+                       "OpDecorate %indata2 Binding 1\n"
+                       "OpDecorate %outdata DescriptorSet 0\n"
+                       "OpDecorate %outdata Binding 2\n"
+                       "OpDecorate %i32arr ArrayStride 4\n"
+                       "OpMemberDecorate %buf 0 Offset 0\n"
+
+                       + string(getComputeAsmCommonTypes()) +
+
+                       "%buf        = OpTypeStruct %i32arr\n"
+                       "%bufptr     = OpTypePointer Uniform %buf\n"
+                       "%indata1    = OpVariable %bufptr Uniform\n"
+                       "%indata2    = OpVariable %bufptr Uniform\n"
+                       "%outdata    = OpVariable %bufptr Uniform\n"
+
+                       "%id        = OpVariable %uvec3ptr Input\n"
+                       "%zero      = OpConstant %i32 0\n"
+
+                       "%main      = OpFunction %void None %voidf\n"
+                       "%label     = OpLabel\n"
+                       "%idval     = OpLoad %uvec3 %id\n"
+                       "%x         = OpCompositeExtract %u32 %idval 0\n"
+                       "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
+                       "%inval1    = OpLoad %i32 %inloc1\n"
+                       "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
+                       "%inval2    = OpLoad %i32 %inloc2\n"
+                       "%rem       = OpSMod %i32 %inval1 %inval2\n"
+                       "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
+                       "             OpStore %outloc %rem\n"
+                       "             OpReturn\n"
+                       "             OpFunctionEnd\n";
+
+               spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
+               spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
+               spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
+               spec.numWorkGroups              = IVec3(numElements, 1, 1);
+               spec.failResult                 = params.failResult;
+               spec.failMessage                = params.failMessage;
+
+               group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
+       }
+
+       return group.release();
+}
+
+tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+       de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
+       de::Random                                              rnd                             (deStringHash(group->getName()));
+       const int                                               numElements             = 200;
+
+       const struct CaseParams
+       {
+               const char*             name;
+               const char*             failMessage;            // customized status message
+               qpTestResult    failResult;                     // override status on failure
+               bool                    positive;
+       } cases[] =
+       {
+               { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
+               { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
+       };
+       // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+       for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+       {
+               const CaseParams&       params          = cases[caseNdx];
 
-               "%buf        = OpTypeStruct %f32arr\n"
-               "%bufptr     = OpTypePointer Uniform %buf\n"
-               "%indata1    = OpVariable %bufptr Uniform\n"
-               "%indata2    = OpVariable %bufptr Uniform\n"
-               "%outdata    = OpVariable %bufptr Uniform\n"
+               ComputeShaderSpec       spec;
+               vector<deInt64>         inputInts1      (numElements, 0);
+               vector<deInt64>         inputInts2      (numElements, 0);
+               vector<deInt64>         outputInts      (numElements, 0);
 
-               "%id        = OpVariable %uvec3ptr Input\n"
-               "%zero      = OpConstant %i32 0\n"
 
-               "%main      = OpFunction %void None %voidf\n"
-               "%label     = OpLabel\n"
-               "%idval     = OpLoad %uvec3 %id\n"
-               "%x         = OpCompositeExtract %u32 %idval 0\n"
-               "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
-               "%inval1    = OpLoad %f32 %inloc1\n"
-               "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
-               "%inval2    = OpLoad %f32 %inloc2\n"
-               "%rem       = OpFRem %f32 %inval1 %inval2\n"
-               "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
-               "             OpStore %outloc %rem\n"
-               "             OpReturn\n"
-               "             OpFunctionEnd\n";
+               if (params.positive)
+               {
+                       fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
+                       fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
+               }
+               else
+               {
+                       fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
+                       fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
+               }
 
-       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
-       spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
-       spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
-       spec.numWorkGroups = IVec3(numElements, 1, 1);
-       spec.verifyIO = &compareFRem;
+               for (int ndx = 0; ndx < numElements; ++ndx)
+               {
+                       deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
+                       if (rem == 0)
+                       {
+                               outputInts[ndx] = 0;
+                       }
+                       else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
+                       {
+                               // They have the same sign
+                               outputInts[ndx] = rem;
+                       }
+                       else
+                       {
+                               // They have opposite sign.  The remainder operation takes the
+                               // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
+                               // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
+                               // the result has the correct sign and that it is still
+                               // congruent to inputInts1[ndx] modulo inputInts2[ndx]
+                               //
+                               // See also http://mathforum.org/library/drmath/view/52343.html
+                               outputInts[ndx] = rem + inputInts2[ndx];
+                       }
+               }
 
-       group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
+               spec.assembly =
+                       "OpCapability Int64\n"
+
+                       + string(getComputeAsmShaderPreamble()) +
+
+                       "OpName %main           \"main\"\n"
+                       "OpName %id             \"gl_GlobalInvocationID\"\n"
+
+                       "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+                       "OpDecorate %buf BufferBlock\n"
+                       "OpDecorate %indata1 DescriptorSet 0\n"
+                       "OpDecorate %indata1 Binding 0\n"
+                       "OpDecorate %indata2 DescriptorSet 0\n"
+                       "OpDecorate %indata2 Binding 1\n"
+                       "OpDecorate %outdata DescriptorSet 0\n"
+                       "OpDecorate %outdata Binding 2\n"
+                       "OpDecorate %i64arr ArrayStride 8\n"
+                       "OpMemberDecorate %buf 0 Offset 0\n"
+
+                       + string(getComputeAsmCommonTypes())
+                       + string(getComputeAsmCommonInt64Types()) +
+
+                       "%buf        = OpTypeStruct %i64arr\n"
+                       "%bufptr     = OpTypePointer Uniform %buf\n"
+                       "%indata1    = OpVariable %bufptr Uniform\n"
+                       "%indata2    = OpVariable %bufptr Uniform\n"
+                       "%outdata    = OpVariable %bufptr Uniform\n"
+
+                       "%id        = OpVariable %uvec3ptr Input\n"
+                       "%zero      = OpConstant %i64 0\n"
+
+                       "%main      = OpFunction %void None %voidf\n"
+                       "%label     = OpLabel\n"
+                       "%idval     = OpLoad %uvec3 %id\n"
+                       "%x         = OpCompositeExtract %u32 %idval 0\n"
+                       "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
+                       "%inval1    = OpLoad %i64 %inloc1\n"
+                       "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
+                       "%inval2    = OpLoad %i64 %inloc2\n"
+                       "%rem       = OpSMod %i64 %inval1 %inval2\n"
+                       "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
+                       "             OpStore %outloc %rem\n"
+                       "             OpReturn\n"
+                       "             OpFunctionEnd\n";
+
+               spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
+               spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
+               spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
+               spec.numWorkGroups              = IVec3(numElements, 1, 1);
+               spec.failResult                 = params.failResult;
+               spec.failMessage                = params.failMessage;
+
+               group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec, COMPUTE_TEST_USES_INT64));
+       }
 
        return group.release();
 }
@@ -1484,7 +2478,8 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
        vector<deInt32>                                 outputInts3             (numElements, 0);
        vector<deInt32>                                 outputInts4             (numElements, 0);
        const StringTemplate                    shaderTemplate  (
-               string(getComputeAsmShaderPreamble()) +
+               "${CAPABILITIES:opt}"
+               + string(getComputeAsmShaderPreamble()) +
 
                "OpName %main           \"main\"\n"
                "OpName %id             \"gl_GlobalInvocationID\"\n"
@@ -1496,6 +2491,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 
                + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
 
+               "${OPTYPE_DEFINITIONS:opt}"
                "%buf     = OpTypeStruct %i32arr\n"
                "%bufptr  = OpTypePointer Uniform %buf\n"
                "%indata    = OpVariable %bufptr Uniform\n"
@@ -1510,6 +2506,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 
                "%main      = OpFunction %void None %voidf\n"
                "%label     = OpLabel\n"
+               "${TYPE_CONVERT:opt}"
                "%idval     = OpLoad %uvec3 %id\n"
                "%x         = OpCompositeExtract %u32 %idval 0\n"
                "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
@@ -1531,6 +2528,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
        }
 
        const char addScToInput[]               = "OpIAdd %i32 %inval %sc_final";
+       const char addSc32ToInput[]             = "OpIAdd %i32 %inval %sc_final32";
        const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_final %inval %zero";
        const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
 
@@ -1565,12 +2563,15 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
        cases.push_back(SpecConstantTwoIntCase("not",                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                           -43,    0,              addScToInput,           outputInts1));
        cases.push_back(SpecConstantTwoIntCase("logicalnot",                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                           1,              0,              selectFalseUsingSc,     outputInts2));
        cases.push_back(SpecConstantTwoIntCase("select",                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %zero",       1,              42,             addScToInput,           outputInts1));
-       // OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
+       cases.push_back(SpecConstantTwoIntCase("sconvert",                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                           -11200, 0,              addSc32ToInput,         outputInts3));
+       // -969998336 stored as 32-bit two's complement is the binary representation of -11200 as IEEE-754 Float
+       cases.push_back(SpecConstantTwoIntCase("fconvert",                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                           -969998336, 0,  addSc32ToInput,         outputInts3));
 
        for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
        {
                map<string, string>             specializations;
                ComputeShaderSpec               spec;
+               ComputeTestFeatures             features = COMPUTE_TEST_USES_NONE;
 
                specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
                specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
@@ -1578,6 +2579,24 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
                specializations["SC_OP"]                        = cases[caseNdx].scOperation;
                specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
 
+               // Special SPIR-V code for SConvert-case
+               if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
+               {
+                       features                                                                = COMPUTE_TEST_USES_INT16;
+                       specializations["CAPABILITIES"]                 = "OpCapability Int16\n";                                                       // Adds 16-bit integer capability
+                       specializations["OPTYPE_DEFINITIONS"]   = "%i16 = OpTypeInt 16 1\n";                                            // Adds 16-bit integer type
+                       specializations["TYPE_CONVERT"]                 = "%sc_final32 = OpSConvert %i32 %sc_final\n";          // Converts 16-bit integer to 32-bit integer
+               }
+
+               // Special SPIR-V code for FConvert-case
+               if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
+               {
+                       features                                                                = COMPUTE_TEST_USES_FLOAT64;
+                       specializations["CAPABILITIES"]                 = "OpCapability Float64\n";                                                     // Adds 64-bit float capability
+                       specializations["OPTYPE_DEFINITIONS"]   = "%f64 = OpTypeFloat 64\n";                                            // Adds 64-bit float type
+                       specializations["TYPE_CONVERT"]                 = "%sc_final32 = OpConvertFToS %i32 %sc_final\n";       // Converts 64-bit float to 32-bit integer
+               }
+
                spec.assembly = shaderTemplate.specialize(specializations);
                spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
                spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
@@ -1585,7 +2604,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
                spec.specConstants.push_back(cases[caseNdx].scActualValue0);
                spec.specConstants.push_back(cases[caseNdx].scActualValue1);
 
-               group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
+               group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec, features));
        }
 
        ComputeShaderSpec                               spec;
@@ -1604,29 +2623,33 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 
                + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
 
-               "%ivec3     = OpTypeVector %i32 3\n"
-               "%buf     = OpTypeStruct %i32arr\n"
-               "%bufptr  = OpTypePointer Uniform %buf\n"
-               "%indata    = OpVariable %bufptr Uniform\n"
-               "%outdata   = OpVariable %bufptr Uniform\n"
+               "%ivec3       = OpTypeVector %i32 3\n"
+               "%buf         = OpTypeStruct %i32arr\n"
+               "%bufptr      = OpTypePointer Uniform %buf\n"
+               "%indata      = OpVariable %bufptr Uniform\n"
+               "%outdata     = OpVariable %bufptr Uniform\n"
 
-               "%id        = OpVariable %uvec3ptr Input\n"
-               "%zero      = OpConstant %i32 0\n"
-               "%ivec3_0   = OpConstantComposite %ivec3 %zero %zero %zero\n"
+               "%id          = OpVariable %uvec3ptr Input\n"
+               "%zero        = OpConstant %i32 0\n"
+               "%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
+               "%vec3_undef  = OpUndef %ivec3\n"
 
                "%sc_0        = OpSpecConstant %i32 0\n"
                "%sc_1        = OpSpecConstant %i32 0\n"
                "%sc_2        = OpSpecConstant %i32 0\n"
-               "%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0   0\n"     // (sc_0, 0, 0)
-               "%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0   1\n"     // (0, sc_1, 0)
-               "%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0   2\n"     // (0, 0, sc_2)
-               "%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %sc_vec3_1 1 0 4\n" // (0,    sc_0, sc_1)
-               "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
-               "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            0\n"     // sc_2
-               "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            1\n"     // sc_0
-               "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            2\n"     // sc_1
-               "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"        // (sc_2 - sc_0)
-               "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"        // (sc_2 - sc_0) * sc_1
+               "%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"                                                 // (sc_0, 0, 0)
+               "%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"                                                 // (0, sc_1, 0)
+               "%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"                                                 // (0, 0, sc_2)
+               "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
+               "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
+               "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
+               "%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
+               "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
+               "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
+               "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
+               "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
+               "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
+               "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"                                                              // (sc_2 - sc_0) * sc_1
 
                "%main      = OpFunction %void None %voidf\n"
                "%label     = OpLabel\n"
@@ -1646,9 +2669,555 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
        spec.specConstants.push_back(56);
        spec.specConstants.push_back(-77);
 
-       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
+
+       return group.release();
+}
+
+void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
+{
+       ComputeShaderSpec       specInt;
+       ComputeShaderSpec       specFloat;
+       ComputeShaderSpec       specVec3;
+       ComputeShaderSpec       specMat4;
+       ComputeShaderSpec       specArray;
+       ComputeShaderSpec       specStruct;
+       de::Random                      rnd                             (deStringHash(group->getName()));
+       const int                       numElements             = 100;
+       vector<float>           inputFloats             (numElements, 0);
+       vector<float>           outputFloats    (numElements, 0);
+
+       fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
+
+       // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
+       floorAll(inputFloats);
+
+       for (size_t ndx = 0; ndx < numElements; ++ndx)
+       {
+               // Just check if the value is positive or not
+               outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
+       }
+
+       // All of the tests are of the form:
+       //
+       // testtype r
+       //
+       // if (inputdata > 0)
+       //   r = 1
+       // else
+       //   r = -1
+       //
+       // return (float)r
+
+       specFloat.assembly =
+               string(getComputeAsmShaderPreamble()) +
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id = OpVariable %uvec3ptr Input\n"
+               "%zero       = OpConstant %i32 0\n"
+               "%float_0    = OpConstant %f32 0.0\n"
+               "%float_1    = OpConstant %f32 1.0\n"
+               "%float_n1   = OpConstant %f32 -1.0\n"
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "            OpSelectionMerge %cm None\n"
+               "            OpBranchConditional %comp %tb %fb\n"
+               "%tb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%fb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%cm       = OpLabel\n"
+               "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       specFloat.numWorkGroups = IVec3(numElements, 1, 1);
+
+       specMat4.assembly =
+               string(getComputeAsmShaderPreamble()) +
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id = OpVariable %uvec3ptr Input\n"
+               "%v4f32      = OpTypeVector %f32 4\n"
+               "%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
+               "%zero       = OpConstant %i32 0\n"
+               "%float_0    = OpConstant %f32 0.0\n"
+               "%float_1    = OpConstant %f32 1.0\n"
+               "%float_n1   = OpConstant %f32 -1.0\n"
+               "%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
+               "%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
+               "%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
+               "%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
+               "%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
+               "%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
+               "%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
+               "%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
+               "%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
+               "%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "            OpSelectionMerge %cm None\n"
+               "            OpBranchConditional %comp %tb %fb\n"
+               "%tb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%fb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%cm       = OpLabel\n"
+               "%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
+               "%res      = OpCompositeExtract %f32 %mres 2 2\n"
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       specMat4.numWorkGroups = IVec3(numElements, 1, 1);
+
+       specVec3.assembly =
+               string(getComputeAsmShaderPreamble()) +
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id = OpVariable %uvec3ptr Input\n"
+               "%zero       = OpConstant %i32 0\n"
+               "%float_0    = OpConstant %f32 0.0\n"
+               "%float_1    = OpConstant %f32 1.0\n"
+               "%float_n1   = OpConstant %f32 -1.0\n"
+               "%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
+               "%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "            OpSelectionMerge %cm None\n"
+               "            OpBranchConditional %comp %tb %fb\n"
+               "%tb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%fb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%cm       = OpLabel\n"
+               "%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
+               "%res      = OpCompositeExtract %f32 %vres 2\n"
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       specVec3.numWorkGroups = IVec3(numElements, 1, 1);
+
+       specInt.assembly =
+               string(getComputeAsmShaderPreamble()) +
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id = OpVariable %uvec3ptr Input\n"
+               "%zero       = OpConstant %i32 0\n"
+               "%float_0    = OpConstant %f32 0.0\n"
+               "%i1         = OpConstant %i32 1\n"
+               "%i2         = OpConstant %i32 -1\n"
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "            OpSelectionMerge %cm None\n"
+               "            OpBranchConditional %comp %tb %fb\n"
+               "%tb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%fb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%cm       = OpLabel\n"
+               "%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
+               "%res      = OpConvertSToF %f32 %ires\n"
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       specInt.numWorkGroups = IVec3(numElements, 1, 1);
+
+       specArray.assembly =
+               string(getComputeAsmShaderPreamble()) +
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id = OpVariable %uvec3ptr Input\n"
+               "%zero       = OpConstant %i32 0\n"
+               "%u7         = OpConstant %u32 7\n"
+               "%float_0    = OpConstant %f32 0.0\n"
+               "%float_1    = OpConstant %f32 1.0\n"
+               "%float_n1   = OpConstant %f32 -1.0\n"
+               "%f32a7      = OpTypeArray %f32 %u7\n"
+               "%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
+               "%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "            OpSelectionMerge %cm None\n"
+               "            OpBranchConditional %comp %tb %fb\n"
+               "%tb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%fb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%cm       = OpLabel\n"
+               "%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
+               "%res      = OpCompositeExtract %f32 %ares 5\n"
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       specArray.numWorkGroups = IVec3(numElements, 1, 1);
+
+       specStruct.assembly =
+               string(getComputeAsmShaderPreamble()) +
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id = OpVariable %uvec3ptr Input\n"
+               "%zero       = OpConstant %i32 0\n"
+               "%float_0    = OpConstant %f32 0.0\n"
+               "%float_1    = OpConstant %f32 1.0\n"
+               "%float_n1   = OpConstant %f32 -1.0\n"
+
+               "%v2f32      = OpTypeVector %f32 2\n"
+               "%Data2      = OpTypeStruct %f32 %v2f32\n"
+               "%Data       = OpTypeStruct %Data2 %f32\n"
+
+               "%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
+               "%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
+               "%s1         = OpConstantComposite %Data %in1b %float_1\n"
+               "%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
+               "%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
+               "%s2         = OpConstantComposite %Data %in2b %float_n1\n"
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "            OpSelectionMerge %cm None\n"
+               "            OpBranchConditional %comp %tb %fb\n"
+               "%tb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%fb       = OpLabel\n"
+               "            OpBranch %cm\n"
+               "%cm       = OpLabel\n"
+               "%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
+               "%res      = OpCompositeExtract %f32 %sres 0 0\n"
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+       specStruct.numWorkGroups = IVec3(numElements, 1, 1);
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
+}
+
+string generateConstantDefinitions (int count)
+{
+       std::ostringstream      r;
+       for (int i = 0; i < count; i++)
+               r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
+       r << "\n";
+       return r.str();
+}
+
+string generateSwitchCases (int count)
+{
+       std::ostringstream      r;
+       for (int i = 0; i < count; i++)
+               r << " " << i << " %case" << i;
+       r << "\n";
+       return r.str();
+}
+
+string generateSwitchTargets (int count)
+{
+       std::ostringstream      r;
+       for (int i = 0; i < count; i++)
+               r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
+       r << "\n";
+       return r.str();
+}
+
+string generateOpPhiParams (int count)
+{
+       std::ostringstream      r;
+       for (int i = 0; i < count; i++)
+               r << " %cf" << (i * 10 + 5) << " %case" << i;
+       r << "\n";
+       return r.str();
+}
+
+string generateIntWidth (int value)
+{
+       std::ostringstream      r;
+       r << value;
+       return r.str();
+}
+
+// Expand input string by injecting "ABC" between the input
+// string characters. The acc/add/treshold parameters are used
+// to skip some of the injections to make the result less
+// uniform (and a lot shorter).
+string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
+{
+       std::ostringstream      res;
+       const char*                     p = s.c_str();
+
+       while (*p)
+       {
+               res << *p;
+               acc += add;
+               if (acc > treshold)
+               {
+                       acc -= treshold;
+                       res << "ABC";
+               }
+               p++;
+       }
+       return res.str();
+}
+
+// Calculate expected result based on the code string
+float calcOpPhiCase5 (float val, const string& s)
+{
+       const char*             p               = s.c_str();
+       float                   x[8];
+       bool                    b[8];
+       const float             tv[8]   = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
+       const float             v               = deFloatAbs(val);
+       float                   res             = 0;
+       int                             depth   = -1;
+       int                             skip    = 0;
+
+       for (int i = 7; i >= 0; --i)
+               x[i] = std::fmod((float)v, (float)(2 << i));
+       for (int i = 7; i >= 0; --i)
+               b[i] = x[i] > tv[i];
+
+       while (*p)
+       {
+               if (*p == 'A')
+               {
+                       depth++;
+                       if (skip == 0 && b[depth])
+                       {
+                               res++;
+                       }
+                       else
+                               skip++;
+               }
+               if (*p == 'B')
+               {
+                       if (skip)
+                               skip--;
+                       if (b[depth] || skip)
+                               skip++;
+               }
+               if (*p == 'C')
+               {
+                       depth--;
+                       if (skip)
+                               skip--;
+               }
+               p++;
+       }
+       return res;
+}
 
-       return group.release();
+// In the code string, the letters represent the following:
+//
+// A:
+//     if (certain bit is set)
+//     {
+//       result++;
+//
+// B:
+//     } else {
+//
+// C:
+//     }
+//
+// examples:
+// AABCBC leads to if(){r++;if(){r++;}else{}}else{}
+// ABABCC leads to if(){r++;}else{if(){r++;}else{}}
+// ABCABC leads to if(){r++;}else{}if(){r++;}else{}
+//
+// Code generation gets a bit complicated due to the else-branches,
+// which do not generate new values. Thus, the generator needs to
+// keep track of the previous variable change seen by the else
+// branch.
+string generateOpPhiCase5 (const string& s)
+{
+       std::stack<int>                         idStack;
+       std::stack<std::string>         value;
+       std::stack<std::string>         valueLabel;
+       std::stack<std::string>         mergeLeft;
+       std::stack<std::string>         mergeRight;
+       std::ostringstream                      res;
+       const char*                                     p                       = s.c_str();
+       int                                                     depth           = -1;
+       int                                                     currId          = 0;
+       int                                                     iter            = 0;
+
+       idStack.push(-1);
+       value.push("%f32_0");
+       valueLabel.push("%f32_0 %entry");
+
+       while (*p)
+       {
+               if (*p == 'A')
+               {
+                       depth++;
+                       currId = iter;
+                       idStack.push(currId);
+                       res << "\tOpSelectionMerge %m" << currId << " None\n";
+                       res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
+                       res << "%t" << currId << " = OpLabel\n";
+                       res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
+                       std::ostringstream tag;
+                       tag << "%rt" << currId;
+                       value.push(tag.str());
+                       tag << " %t" << currId;
+                       valueLabel.push(tag.str());
+               }
+
+               if (*p == 'B')
+               {
+                       mergeLeft.push(valueLabel.top());
+                       value.pop();
+                       valueLabel.pop();
+                       res << "\tOpBranch %m" << currId << "\n";
+                       res << "%f" << currId << " = OpLabel\n";
+                       std::ostringstream tag;
+                       tag << value.top() << " %f" << currId;
+                       valueLabel.pop();
+                       valueLabel.push(tag.str());
+               }
+
+               if (*p == 'C')
+               {
+                       mergeRight.push(valueLabel.top());
+                       res << "\tOpBranch %m" << currId << "\n";
+                       res << "%m" << currId << " = OpLabel\n";
+                       if (*(p + 1) == 0)
+                               res << "%res"; // last result goes to %res
+                       else
+                               res << "%rm" << currId;
+                       res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
+                       std::ostringstream tag;
+                       tag << "%rm" << currId;
+                       value.pop();
+                       value.push(tag.str());
+                       tag << " %m" << currId;
+                       valueLabel.pop();
+                       valueLabel.push(tag.str());
+                       mergeLeft.pop();
+                       mergeRight.pop();
+                       depth--;
+                       idStack.pop();
+                       currId = idStack.top();
+               }
+               p++;
+               iter++;
+       }
+       return res.str();
 }
 
 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
@@ -1657,12 +3226,24 @@ tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
        ComputeShaderSpec                               spec1;
        ComputeShaderSpec                               spec2;
        ComputeShaderSpec                               spec3;
+       ComputeShaderSpec                               spec4;
+       ComputeShaderSpec                               spec5;
        de::Random                                              rnd                             (deStringHash(group->getName()));
        const int                                               numElements             = 100;
        vector<float>                                   inputFloats             (numElements, 0);
        vector<float>                                   outputFloats1   (numElements, 0);
        vector<float>                                   outputFloats2   (numElements, 0);
        vector<float>                                   outputFloats3   (numElements, 0);
+       vector<float>                                   outputFloats4   (numElements, 0);
+       vector<float>                                   outputFloats5   (numElements, 0);
+       std::string                                             codestring              = "ABC";
+       const int                                               test4Width              = 1024;
+
+       // Build case 5 code string. Each iteration makes the hierarchy more complicated.
+       // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
+       // shader code.
+       for (int i = 0, acc = 0; i < 9; i++)
+               codestring = expandOpPhiCase5(codestring, acc, 7, 24);
 
        fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
 
@@ -1680,6 +3261,11 @@ tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
                }
                outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
                outputFloats3[ndx] = 8.5f - inputFloats[ndx];
+
+               int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
+               outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
+
+               outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
        }
 
        spec1.assembly =
@@ -1832,6 +3418,142 @@ tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
 
        group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
 
+       spec4.assembly =
+               "OpCapability Shader\n"
+               "%ext = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id       = OpVariable %uvec3ptr Input\n"
+               "%zero     = OpConstant %i32 0\n"
+               "%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
+
+               + generateConstantDefinitions(test4Width) +
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+               "%xf       = OpConvertUToF %f32 %x\n"
+               "%xm       = OpFMul %f32 %xf %inval\n"
+               "%xa       = OpExtInst %f32 %ext FAbs %xm\n"
+               "%xi       = OpConvertFToU %u32 %xa\n"
+               "%selector = OpUMod %u32 %xi %cimod\n"
+               "            OpSelectionMerge %phi None\n"
+               "            OpSwitch %selector %default "
+
+               + generateSwitchCases(test4Width) +
+
+               "%default  = OpLabel\n"
+               "            OpUnreachable\n"
+
+               + generateSwitchTargets(test4Width) +
+
+               "%phi      = OpLabel\n"
+               "%result   = OpPhi %f32"
+
+               + generateOpPhiParams(test4Width) +
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %result\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
+       spec4.numWorkGroups = IVec3(numElements, 1, 1);
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
+
+       spec5.assembly =
+               "OpCapability Shader\n"
+               "%ext      = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint GLCompute %main \"main\" %id\n"
+               "OpExecutionMode %main LocalSize 1 1 1\n"
+               "%code     = OpString \"" + codestring + "\"\n"
+
+               "OpSource GLSL 430\n"
+               "OpName %main \"main\"\n"
+               "OpName %id \"gl_GlobalInvocationID\"\n"
+
+               "OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+               + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+               "%id       = OpVariable %uvec3ptr Input\n"
+               "%zero     = OpConstant %i32 0\n"
+               "%f32_0    = OpConstant %f32 0.0\n"
+               "%f32_0_5  = OpConstant %f32 0.5\n"
+               "%f32_1    = OpConstant %f32 1.0\n"
+               "%f32_1_5  = OpConstant %f32 1.5\n"
+               "%f32_2    = OpConstant %f32 2.0\n"
+               "%f32_3_5  = OpConstant %f32 3.5\n"
+               "%f32_4    = OpConstant %f32 4.0\n"
+               "%f32_7_5  = OpConstant %f32 7.5\n"
+               "%f32_8    = OpConstant %f32 8.0\n"
+               "%f32_15_5 = OpConstant %f32 15.5\n"
+               "%f32_16   = OpConstant %f32 16.0\n"
+               "%f32_31_5 = OpConstant %f32 31.5\n"
+               "%f32_32   = OpConstant %f32 32.0\n"
+               "%f32_63_5 = OpConstant %f32 63.5\n"
+               "%f32_64   = OpConstant %f32 64.0\n"
+               "%f32_127_5 = OpConstant %f32 127.5\n"
+               "%f32_128  = OpConstant %f32 128.0\n"
+               "%f32_256  = OpConstant %f32 256.0\n"
+
+               "%main     = OpFunction %void None %voidf\n"
+               "%entry    = OpLabel\n"
+               "%idval    = OpLoad %uvec3 %id\n"
+               "%x        = OpCompositeExtract %u32 %idval 0\n"
+               "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+               "%inval    = OpLoad %f32 %inloc\n"
+
+               "%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
+               "%x8       = OpFMod %f32 %xabs %f32_256\n"
+               "%x7       = OpFMod %f32 %xabs %f32_128\n"
+               "%x6       = OpFMod %f32 %xabs %f32_64\n"
+               "%x5       = OpFMod %f32 %xabs %f32_32\n"
+               "%x4       = OpFMod %f32 %xabs %f32_16\n"
+               "%x3       = OpFMod %f32 %xabs %f32_8\n"
+               "%x2       = OpFMod %f32 %xabs %f32_4\n"
+               "%x1       = OpFMod %f32 %xabs %f32_2\n"
+
+               "%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
+               "%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
+               "%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
+               "%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
+               "%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
+               "%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
+               "%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
+               "%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
+
+               + generateOpPhiCase5(codestring) +
+
+               "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+               "            OpStore %outloc %res\n"
+               "            OpReturn\n"
+
+               "            OpFunctionEnd\n";
+       spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+       spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
+       spec5.numWorkGroups = IVec3(numElements, 1, 1);
+
+       group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
+
+       createOpPhiVartypeTests(group, testCtx);
+
        return group.release();
 }
 
@@ -2484,11 +4206,12 @@ bool compareOpQuantizeF16ComputeExactCase (const std::vector<BufferSp>&, const v
        if (outputAllocs.size() != 1)
                return false;
 
-       // We really just need this for size because we cannot compare Nans.
-       const BufferSp& expectedOutput  = expectedOutputs[0];
-       const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+       // Only size is needed because we cannot compare Nans.
+       size_t byteSize = expectedOutputs[0]->getByteSize();
+
+       const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
 
-       if (expectedOutput->getNumBytes() != 4*sizeof(float)) {
+       if (byteSize != 4*sizeof(float)) {
                return false;
        }
 
@@ -2524,11 +4247,12 @@ bool compareNan (const std::vector<BufferSp>&, const vector<AllocationSp>& outpu
        if (outputAllocs.size() != 1)
                return false;
 
-       // We really just need this for size because we cannot compare Nans.
-       const BufferSp& expectedOutput          = expectedOutputs[0];
-       const float* output_as_float            = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+       // Only size is needed because we cannot compare Nans.
+       size_t byteSize = expectedOutputs[0]->getByteSize();
 
-       for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+       const float* const      output_as_float = static_cast<const float* const>(outputAllocs[0]->getHostPtr());
+
+       for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
        {
                if (!deFloatIsNaN(output_as_float[idx]))
                {
@@ -3474,6 +5198,7 @@ tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
 
                return group.release();
 }
+
 } // anonymous
 
 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
@@ -4265,6 +5990,7 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
                "OpDecorate %sc_1  SpecId 1\n";
 
        const char      typesAndConstants1[]    =
+               "${OPTYPE_DEFINITIONS:opt}"
                "%sc_0      = OpSpecConstant${SC_DEF0}\n"
                "%sc_1      = OpSpecConstant${SC_DEF1}\n"
                "%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
@@ -4273,6 +5999,7 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
                "%test_code = OpFunction %v4f32 None %v4f32_function\n"
                "%param     = OpFunctionParameter %v4f32\n"
                "%label     = OpLabel\n"
+               "${TYPE_CONVERT:opt}"
                "%result    = OpVariable %fp_v4f32 Function\n"
                "             OpStore %result %param\n"
                "%gen       = ${GEN_RESULT}\n"
@@ -4309,6 +6036,7 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
        outputColors2[3] = RGBA(0,   0,   255, 255);
 
        const char addZeroToSc[]                = "OpIAdd %i32 %c_i32_0 %sc_op";
+       const char addZeroToSc32[]              = "OpIAdd %i32 %c_i32_0 %sc_op32";
        const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
        const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
 
@@ -4343,14 +6071,39 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
        cases.push_back(SpecConstantTwoIntGraphicsCase("not",                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                                   -2,             0,              addZeroToSc,            outputColors2));
        cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                                   1,              0,              selectFalseUsingSc,     outputColors2));
        cases.push_back(SpecConstantTwoIntGraphicsCase("select",                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %c_i32_0",    1,              1,              addZeroToSc,            outputColors2));
-       // OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
+       cases.push_back(SpecConstantTwoIntGraphicsCase("sconvert",                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                                   -1,             0,              addZeroToSc32,          outputColors0));
+       // -1082130432 stored as 32-bit two's complement is the binary representation of -1 as IEEE-754 Float
+       cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert",                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                                   -1082130432, 0, addZeroToSc32,          outputColors0));
        // \todo[2015-12-1 antiagainst] OpQuantizeToF16
 
        for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
        {
-               map<string, string>     specializations;
-               map<string, string>     fragments;
-               vector<deInt32>         specConstants;
+               map<string, string>                     specializations;
+               map<string, string>                     fragments;
+               vector<deInt32>                         specConstants;
+               vector<string>                          features;
+               PushConstants                           noPushConstants;
+               GraphicsResources                       noResources;
+               GraphicsInterfaces                      noInterfaces;
+               std::vector<std::string>        noExtensions;
+
+               // Special SPIR-V code for SConvert-case
+               if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
+               {
+                       features.push_back("shaderInt16");
+                       fragments["capability"]                                 = "OpCapability Int16\n";                                       // Adds 16-bit integer capability
+                       specializations["OPTYPE_DEFINITIONS"]   = "%i16 = OpTypeInt 16 1\n";                            // Adds 16-bit integer type
+                       specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpSConvert %i32 %sc_op\n";        // Converts 16-bit integer to 32-bit integer
+               }
+
+               // Special SPIR-V code for FConvert-case
+               if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
+               {
+                       features.push_back("shaderFloat64");
+                       fragments["capability"]                                 = "OpCapability Float64\n";                                     // Adds 64-bit float capability
+                       specializations["OPTYPE_DEFINITIONS"]   = "%f64 = OpTypeFloat 64\n";                            // Adds 64-bit float type
+                       specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpConvertFToS %i32 %sc_op\n";     // Converts 64-bit float to 32-bit integer
+               }
 
                specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
                specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
@@ -4365,7 +6118,9 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
                specConstants.push_back(cases[caseNdx].scActualValue0);
                specConstants.push_back(cases[caseNdx].scActualValue1);
 
-               createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants, group.get());
+               createTestsForAllStages(
+                       cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
+                       noPushConstants, noResources, noInterfaces, noExtensions, features, VulkanFeatures(), group.get());
        }
 
        const char      decorations2[]                  =
@@ -4374,23 +6129,26 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
                "OpDecorate %sc_2  SpecId 2\n";
 
        const char      typesAndConstants2[]    =
-               "%v3i32     = OpTypeVector %i32 3\n"
-
-               "%sc_0      = OpSpecConstant %i32 0\n"
-               "%sc_1      = OpSpecConstant %i32 0\n"
-               "%sc_2      = OpSpecConstant %i32 0\n"
-
+               "%v3i32       = OpTypeVector %i32 3\n"
                "%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
-               "%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0    0\n"     // (sc_0, 0, 0)
-               "%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0    1\n"     // (0, sc_1, 0)
-               "%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0    2\n"     // (0, 0, sc_2)
-               "%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %sc_vec3_1 1 0 4\n" // (0,    sc_0, sc_1)
-               "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
-               "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            0\n"     // sc_2
-               "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            1\n"     // sc_0
-               "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            2\n"     // sc_1
-               "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"        // (sc_2 - sc_0)
-               "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";       // (sc_2 - sc_0) * sc_1
+               "%vec3_undef  = OpUndef %v3i32\n"
+
+               "%sc_0        = OpSpecConstant %i32 0\n"
+               "%sc_1        = OpSpecConstant %i32 0\n"
+               "%sc_2        = OpSpecConstant %i32 0\n"
+               "%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"                                                 // (sc_0, 0,    0)
+               "%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"                                                 // (0,    sc_1, 0)
+               "%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"                                                 // (0,    0,    sc_2)
+               "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
+               "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
+               "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
+               "%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
+               "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
+               "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
+               "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
+               "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
+               "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
+               "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";                                                             // (sc_2 - sc_0) * sc_1
 
        const char      function2[]                             =
                "%test_code = OpFunction %v4f32 None %v4f32_function\n"
@@ -4615,8 +6373,7 @@ tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
                "%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
                "%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
                "%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
-               "%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n"
-               ;
+               "%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
 
        const char                                              function[]       =
                "%test_code      = OpFunction %v4f32 None %v4f32_function\n"
@@ -4767,7 +6524,7 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
        const NameCodePair tests[] =
        {
                {"bool", "", "%bool"},
-               {"vec2uint32", "%type = OpTypeVector %u32 2", "%type"},
+               {"vec2uint32", "", "%v2u32"},
                {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
                {"sampler", "%type = OpTypeSampler", "%type"},
                {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
@@ -4802,8 +6559,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
                "%b = OpFAdd %f32 %a %actually_zero\n"
                "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
                "OpReturnValue %ret\n"
-               "OpFunctionEnd\n"
-               ;
+               "OpFunctionEnd\n";
+
        createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
        fragments["testfun"] =
@@ -4815,8 +6572,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
                "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
                "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
                "OpReturnValue %ret\n"
-               "OpFunctionEnd\n"
-               ;
+               "OpFunctionEnd\n";
+
        createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
        fragments["testfun"] =
@@ -4828,8 +6585,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
                "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
                "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
                "OpReturnValue %ret\n"
-               "OpFunctionEnd\n"
-               ;
+               "OpFunctionEnd\n";
+
        createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
        fragments["testfun"] =
@@ -4863,8 +6620,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
                "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
                "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
                "OpReturnValue %ret\n"
-               "OpFunctionEnd\n"
-               ;
+               "OpFunctionEnd\n";
+
        createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
        fragments["pre_main"] =
@@ -4900,8 +6657,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
                "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
                "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
                "OpReturnValue %ret\n"
-               "OpFunctionEnd\n"
-               ;
+               "OpFunctionEnd\n";
+
        createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
 
        return opUndefTests.release();
@@ -5327,8 +7084,8 @@ tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
                "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
                "OpReturnValue %result\n"
 
-               "OpFunctionEnd\n"
-               ;
+               "OpFunctionEnd\n";
+
        createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
 
        // Body comprised of multiple basic blocks.
@@ -5713,6 +7470,172 @@ tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
        return testGroup.release();
 }
 
+// Test for the OpSRem instruction.
+tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+       de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
+       map<string, string>                                     fragments;
+
+       fragments["pre_main"]                            =
+               "%c_f32_255 = OpConstant %f32 255.0\n"
+               "%c_i32_128 = OpConstant %i32 128\n"
+               "%c_i32_255 = OpConstant %i32 255\n"
+               "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
+               "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
+               "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
+
+       // The test does the following.
+       // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
+       // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
+       // return float(result + 128) / 255.0;
+       fragments["testfun"]                             =
+               "%test_code = OpFunction %v4f32 None %v4f32_function\n"
+               "%param1 = OpFunctionParameter %v4f32\n"
+               "%label_testfun = OpLabel\n"
+               "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
+               "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
+               "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
+               "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
+               "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
+               "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
+               "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
+               "%x_out = OpSRem %i32 %x_in %y_in\n"
+               "%y_out = OpSRem %i32 %y_in %z_in\n"
+               "%z_out = OpSRem %i32 %z_in %x_in\n"
+               "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
+               "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
+               "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
+               "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
+               "OpReturnValue %float_out\n"
+               "OpFunctionEnd\n";
+
+       const struct CaseParams
+       {
+               const char*             name;
+               const char*             failMessageTemplate;    // customized status message
+               qpTestResult    failResult;                             // override status on failure
+               int                             operands[4][3];                 // four (x, y, z) vectors of operands
+               int                             results[4][3];                  // four (x, y, z) vectors of results
+       } cases[] =
+       {
+               {
+                       "positive",
+                       "${reason}",
+                       QP_TEST_RESULT_FAIL,
+                       { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                 // operands
+                       { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                 // results
+               },
+               {
+                       "all",
+                       "Inconsistent results, but within specification: ${reason}",
+                       negFailResult,                                                                                                                  // negative operands, not required by the spec
+                       { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },    // operands
+                       { { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },    // results
+               },
+       };
+       // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+       for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+       {
+               const CaseParams&       params                  = cases[caseNdx];
+               RGBA                            inputColors[4];
+               RGBA                            outputColors[4];
+
+               for (int i = 0; i < 4; ++i)
+               {
+                       inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
+                       outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
+               }
+
+               createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
+       }
+
+       return testGroup.release();
+}
+
+// Test for the OpSMod instruction.
+tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
+{
+       de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
+       map<string, string>                                     fragments;
+
+       fragments["pre_main"]                            =
+               "%c_f32_255 = OpConstant %f32 255.0\n"
+               "%c_i32_128 = OpConstant %i32 128\n"
+               "%c_i32_255 = OpConstant %i32 255\n"
+               "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
+               "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
+               "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
+
+       // The test does the following.
+       // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
+       // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
+       // return float(result + 128) / 255.0;
+       fragments["testfun"]                             =
+               "%test_code = OpFunction %v4f32 None %v4f32_function\n"
+               "%param1 = OpFunctionParameter %v4f32\n"
+               "%label_testfun = OpLabel\n"
+               "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
+               "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
+               "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
+               "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
+               "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
+               "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
+               "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
+               "%x_out = OpSMod %i32 %x_in %y_in\n"
+               "%y_out = OpSMod %i32 %y_in %z_in\n"
+               "%z_out = OpSMod %i32 %z_in %x_in\n"
+               "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
+               "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
+               "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
+               "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
+               "OpReturnValue %float_out\n"
+               "OpFunctionEnd\n";
+
+       const struct CaseParams
+       {
+               const char*             name;
+               const char*             failMessageTemplate;    // customized status message
+               qpTestResult    failResult;                             // override status on failure
+               int                             operands[4][3];                 // four (x, y, z) vectors of operands
+               int                             results[4][3];                  // four (x, y, z) vectors of results
+       } cases[] =
+       {
+               {
+                       "positive",
+                       "${reason}",
+                       QP_TEST_RESULT_FAIL,
+                       { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                         // operands
+                       { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                         // results
+               },
+               {
+                       "all",
+                       "Inconsistent results, but within specification: ${reason}",
+                       negFailResult,                                                                                                                          // negative operands, not required by the spec
+                       { { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },        // operands
+                       { { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },        // results
+               },
+       };
+       // If either operand is negative the result is undefined. Some implementations may still return correct values.
+
+       for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
+       {
+               const CaseParams&       params                  = cases[caseNdx];
+               RGBA                            inputColors[4];
+               RGBA                            outputColors[4];
+
+               for (int i = 0; i < 4; ++i)
+               {
+                       inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
+                       outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
+               }
+
+               createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
+       }
+
+       return testGroup.release();
+}
+
 enum IntegerType
 {
        INTEGER_TYPE_SIGNED_16,
@@ -5782,6 +7705,12 @@ const string getAsmTypeDeclaration (IntegerType type)
        return "OpTypeInt " + getBitWidthStr(type) + sign;
 }
 
+const string getAsmTypeName (IntegerType type)
+{
+       const string prefix = isSigned(type) ? "%i" : "%u";
+       return prefix + getBitWidthStr(type);
+}
+
 template<typename T>
 BufferSp getSpecializedBuffer (deInt64 number)
 {
@@ -5846,21 +7775,34 @@ struct ConvertCase
        , m_inputBuffer         (getBuffer(from, number))
        , m_outputBuffer        (getBuffer(to, number))
        {
-               m_asmTypes["inputType"]         = getAsmTypeDeclaration(from);
-               m_asmTypes["outputType"]        = getAsmTypeDeclaration(to);
+               m_asmTypes["inputType"]         = getAsmTypeName(from);
+               m_asmTypes["outputType"]        = getAsmTypeName(to);
 
                if (m_features == COMPUTE_TEST_USES_INT16)
                {
-                       m_asmTypes["int_capabilities"] = "OpCapability Int16\n";
+                       m_asmTypes["int_capabilities"]    = "OpCapability Int16\n"
+                                                                                               "OpCapability StorageUniformBufferBlock16\n";
+                       m_asmTypes["int_additional_decl"] = "%i16        = OpTypeInt 16 1\n"
+                                                                                               "%u16        = OpTypeInt 16 0\n";
+                       m_asmTypes["int_extensions"]      = "OpExtension \"SPV_KHR_16bit_storage\"\n";
                }
                else if (m_features == COMPUTE_TEST_USES_INT64)
                {
-                       m_asmTypes["int_capabilities"] = "OpCapability Int64\n";
+                       m_asmTypes["int_capabilities"]    = "OpCapability Int64\n";
+                       m_asmTypes["int_additional_decl"] = "%i64        = OpTypeInt 64 1\n"
+                                                                                               "%u64        = OpTypeInt 64 0\n";
+                       m_asmTypes["int_extensions"]      = "";
                }
                else if (m_features == COMPUTE_TEST_USES_INT16_INT64)
                {
-                       m_asmTypes["int_capabilities"] = string("OpCapability Int16\n") +
-                                                                                                       "OpCapability Int64\n";
+                       m_asmTypes["int_capabilities"]    = "OpCapability Int16\n"
+                                                                                               "OpCapability StorageUniformBufferBlock16\n"
+                                                                                               "OpCapability Int64\n";
+                       m_asmTypes["int_additional_decl"] = "%i16        = OpTypeInt 16 1\n"
+                                                                                               "%u16        = OpTypeInt 16 0\n"
+                                                                                               "%i64        = OpTypeInt 64 1\n"
+                                                                                               "%u64        = OpTypeInt 64 0\n";
+                       m_asmTypes["int_extensions"]      = "OpExtension \"SPV_KHR_16bit_storage\"\n";
                }
                else
                {
@@ -5889,6 +7831,7 @@ const string getConvertCaseShaderStr (const string& instruction, const ConvertCa
        const StringTemplate shader (
                "OpCapability Shader\n"
                "${int_capabilities}"
+               "${int_extensions}"
                "OpMemoryModel Logical GLSL450\n"
                "OpEntryPoint GLCompute %main \"main\" %id\n"
                "OpExecutionMode %main LocalSize 1 1 1\n"
@@ -5912,23 +7855,21 @@ const string getConvertCaseShaderStr (const string& instruction, const ConvertCa
                "%voidf      = OpTypeFunction %void\n"
                "%u32        = OpTypeInt 32 0\n"
                "%i32        = OpTypeInt 32 1\n"
+               "${int_additional_decl}"
                "%uvec3      = OpTypeVector %u32 3\n"
                "%uvec3ptr   = OpTypePointer Input %uvec3\n"
-               // Custom types
-               "%in_type    = ${inputType}\n"
-               "%out_type   = ${outputType}\n"
                // Derived types
-               "%in_ptr     = OpTypePointer Uniform %in_type\n"
-               "%out_ptr    = OpTypePointer Uniform %out_type\n"
-               "%in_arr     = OpTypeRuntimeArray %in_type\n"
-               "%out_arr    = OpTypeRuntimeArray %out_type\n"
+               "%in_ptr     = OpTypePointer Uniform ${inputType}\n"
+               "%out_ptr    = OpTypePointer Uniform ${outputType}\n"
+               "%in_arr     = OpTypeRuntimeArray ${inputType}\n"
+               "%out_arr    = OpTypeRuntimeArray ${outputType}\n"
                "%in_buf     = OpTypeStruct %in_arr\n"
                "%out_buf    = OpTypeStruct %out_arr\n"
                "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
                "%out_bufptr = OpTypePointer Uniform %out_buf\n"
                "%indata     = OpVariable %in_bufptr Uniform\n"
                "%outdata    = OpVariable %out_bufptr Uniform\n"
-               "%inputptr   = OpTypePointer Input %in_type\n"
+               "%inputptr   = OpTypePointer Input ${inputType}\n"
                "%id         = OpVariable %uvec3ptr Input\n"
                // Constants
                "%zero       = OpConstant %i32 0\n"
@@ -5939,8 +7880,8 @@ const string getConvertCaseShaderStr (const string& instruction, const ConvertCa
                "%x          = OpCompositeExtract %u32 %idval 0\n"
                "%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
                "%outloc     = OpAccessChain %out_ptr %outdata %zero %x\n"
-               "%inval      = OpLoad %in_type %inloc\n"
-               "%conv       = ${instruction} %out_type %inval\n"
+               "%inval      = OpLoad ${inputType} %inloc\n"
+               "%conv       = ${instruction} ${outputType} %inval\n"
                "              OpStore %outloc %conv\n"
                "              OpReturn\n"
                "              OpFunctionEnd\n"
@@ -5981,6 +7922,11 @@ tcu::TestCaseGroup* createSConvertTests (tcu::TestContext& testCtx)
                spec.outputs.push_back(test->m_outputBuffer);
                spec.numWorkGroups = IVec3(1, 1, 1);
 
+               if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
+               {
+                       spec.extensions.push_back("VK_KHR_16bit_storage");
+               }
+
                group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpSConvert.", spec, test->m_features));
        }
 
@@ -6019,6 +7965,11 @@ tcu::TestCaseGroup* createUConvertTests (tcu::TestContext& testCtx)
                spec.outputs.push_back(test->m_outputBuffer);
                spec.numWorkGroups = IVec3(1, 1, 1);
 
+               if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
+               {
+                       spec.extensions.push_back("VK_KHR_16bit_storage");
+               }
+
                group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpUConvert.", spec, test->m_features));
        }
        return group.release();
@@ -6088,16 +8039,18 @@ void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Ra
        // Vec2 to Vec4
        for (int width = 2; width <= 4; ++width)
        {
-               string randomConst = numberToString(getInt(rnd));
-               string widthStr = numberToString(width);
-               int index = rnd.getInt(0, width-1);
-
-               params["type"]                                  = "vec";
-               params["name"]                                  = params["type"] + "_" + widthStr;
-               params["compositeType"]                 = "%composite = OpTypeVector %custom " + widthStr +"\n";
-               params["filler"]                                = string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
-               params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
-               params["indexes"]                               = numberToString(index);
+               const string randomConst = numberToString(getInt(rnd));
+               const string widthStr = numberToString(width);
+               const string composite_type = "${customType}vec" + widthStr;
+               const int index = rnd.getInt(0, width-1);
+
+               params["type"]                  = "vec";
+               params["name"]                  = params["type"] + "_" + widthStr;
+               params["compositeDecl"]         = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
+               params["compositeType"]         = composite_type;
+               params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
+               params["compositeConstruct"]    = "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
+               params["indexes"]               = numberToString(index);
                testCases.push_back(params);
        }
 }
@@ -6113,14 +8066,14 @@ void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Ran
                string widthStr = numberToString(width);
                int index = rnd.getInt(0, width-1);
 
-               params["type"]                                  = "array";
-               params["name"]                                  = params["type"] + "_" + widthStr;
-               params["compositeType"]                 = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
-                                                                                       +        "%composite = OpTypeArray %custom %arraywidth\n";
-
-               params["filler"]                                = string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
+               params["type"]                  = "array";
+               params["name"]                  = params["type"] + "_" + widthStr;
+               params["compositeDecl"]         = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
+                                                                                       +        "%composite = OpTypeArray ${customType} %arraywidth\n";
+               params["compositeType"]         = "%composite";
+               params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
                params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
-               params["indexes"]                               = numberToString(index);
+               params["indexes"]               = numberToString(index);
                testCases.push_back(params);
        }
 }
@@ -6135,12 +8088,13 @@ void createStructCompositeCases (vector<map<string, string> >& testCases, de::Ra
                string randomConst = numberToString(getInt(rnd));
                int index = rnd.getInt(0, width-1);
 
-               params["type"]                                  = "struct";
-               params["name"]                                  = params["type"] + "_" + numberToString(width);
-               params["compositeType"]                 = "%composite = OpTypeStruct" + repeatString(" %custom", width) + "\n";
-               params["filler"]                                = string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
+               params["type"]                  = "struct";
+               params["name"]                  = params["type"] + "_" + numberToString(width);
+               params["compositeDecl"]         = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
+               params["compositeType"]         = "%composite";
+               params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
                params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
-               params["indexes"]                               = numberToString(index);
+               params["indexes"]               = numberToString(index);
                testCases.push_back(params);
        }
 }
@@ -6160,16 +8114,17 @@ void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Ra
                        int index_1 = rnd.getInt(0, width-1);
                        string columnStr = numberToString(column);
 
-                       params["type"]                                  = "matrix";
-                       params["name"]                                  = params["type"] + "_" + widthStr + "x" + columnStr;
-                       params["compositeType"]                 = string("%vectype   = OpTypeVector %custom " + widthStr + "\n")
+                       params["type"]          = "matrix";
+                       params["name"]          = params["type"] + "_" + widthStr + "x" + columnStr;
+                       params["compositeDecl"] = string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
                                                                                                +        "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
+                       params["compositeType"] = "%composite";
 
-                       params["filler"]                                = string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n"
+                       params["filler"]        = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
                                                                                                +        "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
 
                        params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
-                       params["indexes"]                               = numberToString(index_0) + " " + numberToString(index_1);
+                       params["indexes"]       = numberToString(index_0) + " " + numberToString(index_1);
                        testCases.push_back(params);
                }
        }
@@ -6198,15 +8153,37 @@ const string getAssemblyTypeDeclaration (const NumberType type)
        }
 }
 
+const string getAssemblyTypeName (const NumberType type)
+{
+       switch (type)
+       {
+               case NUMBERTYPE_INT32:          return "%i32";
+               case NUMBERTYPE_UINT32:         return "%u32";
+               case NUMBERTYPE_FLOAT32:        return "%f32";
+               default:                        DE_ASSERT(false); return "";
+       }
+}
+
 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
 {
        map<string, string>     parameters(params);
 
-       parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
-
+       const string customType = getAssemblyTypeName(type);
+       map<string, string> substCustomType;
+       substCustomType["customType"] = customType;
+       parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
+       parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
+       parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
+       parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
+       parameters["customType"] = customType;
        parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
 
-       return StringTemplate (
+       if (parameters.at("compositeType") != "%u32vec3")
+       {
+               parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
+       }
+
+       return StringTemplate(
                "OpCapability Shader\n"
                "OpCapability Matrix\n"
                "OpMemoryModel Logical GLSL450\n"
@@ -6233,19 +8210,20 @@ const string specializeCompositeInsertShaderTemplate (const NumberType type, con
                "%voidf     = OpTypeFunction %void\n"
                "%u32       = OpTypeInt 32 0\n"
                "%i32       = OpTypeInt 32 1\n"
-               "%uvec3     = OpTypeVector %u32 3\n"
-               "%uvec3ptr  = OpTypePointer Input %uvec3\n"
+               "%f32       = OpTypeFloat 32\n"
 
-               // Custom type
-               "%custom    = ${typeDeclaration}\n"
-               "${compositeType}"
+               // Composite declaration
+               "${compositeDecl}"
 
                // Constants
                "${filler}"
 
+               "${u32vec3Decl:opt}"
+               "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
+
                // Inherited from custom
-               "%customptr = OpTypePointer Uniform %custom\n"
-               "%customarr = OpTypeRuntimeArray %custom\n"
+               "%customptr = OpTypePointer Uniform ${customType}\n"
+               "%customarr = OpTypeRuntimeArray ${customType}\n"
                "%buf       = OpTypeStruct %customarr\n"
                "%bufptr    = OpTypePointer Uniform %buf\n"
 
@@ -6257,19 +8235,19 @@ const string specializeCompositeInsertShaderTemplate (const NumberType type, con
 
                "%main      = OpFunction %void None %voidf\n"
                "%label     = OpLabel\n"
-               "%idval     = OpLoad %uvec3 %id\n"
+               "%idval     = OpLoad %u32vec3 %id\n"
                "%x         = OpCompositeExtract %u32 %idval 0\n"
 
                "%inloc     = OpAccessChain %customptr %indata %zero %x\n"
                "%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
                // Read the input value
-               "%inval     = OpLoad %custom %inloc\n"
+               "%inval     = OpLoad ${customType} %inloc\n"
                // Create the composite and fill it
                "${compositeConstruct}"
                // Insert the input value to a place
-               "%instance2 = OpCompositeInsert %composite %inval %instance ${indexes}\n"
+               "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
                // Read back the value from the position
-               "%out_val   = OpCompositeExtract %custom %instance2 ${indexes}\n"
+               "%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
                // Store it in the output position
                "             OpStore %outloc %out_val\n"
                "             OpReturn\n"
@@ -6358,10 +8336,9 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
        vector<string>          indexes         = de::splitString(fullIndex, ' ');
 
        map<string, string>     parameters      (params);
-       parameters["typeDeclaration"]   = getAssemblyTypeDeclaration(type);
-       parameters["structType"]                = repeatString(" %composite", structInfo.components);
+       parameters["structType"]        = repeatString(" ${compositeType}", structInfo.components);
        parameters["structConstruct"]   = repeatString(" %instance", structInfo.components);
-       parameters["insertIndexes"]             = fullIndex;
+       parameters["insertIndexes"]     = fullIndex;
 
        // In matrix cases the last two index is the CompositeExtract indexes
        const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
@@ -6386,7 +8363,25 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
 
        parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
 
-       return StringTemplate (
+       const string customType = getAssemblyTypeName(type);
+       map<string, string> substCustomType;
+       substCustomType["customType"] = customType;
+       parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
+       parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
+       parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
+       parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
+       parameters["customType"] = customType;
+
+       const string compositeType = parameters.at("compositeType");
+       map<string, string> substCompositeType;
+       substCompositeType["compositeType"] = compositeType;
+       parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
+       if (compositeType != "%u32vec3")
+       {
+               parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
+       }
+
+       return StringTemplate(
                "OpCapability Shader\n"
                "OpCapability Matrix\n"
                "OpMemoryModel Logical GLSL450\n"
@@ -6409,23 +8404,24 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
                // General types
                "%void      = OpTypeVoid\n"
                "%voidf     = OpTypeFunction %void\n"
+               "%i32       = OpTypeInt 32 1\n"
                "%u32       = OpTypeInt 32 0\n"
-               "%uvec3     = OpTypeVector %u32 3\n"
-               "%uvec3ptr  = OpTypePointer Input %uvec3\n"
-               // Custom type
-               "%custom    = ${typeDeclaration}\n"
+               "%f32       = OpTypeFloat 32\n"
                // Custom types
-               "${compositeType}"
+               "${compositeDecl}"
+               // %u32vec3 if not already declared in ${compositeDecl}
+               "${u32vec3Decl:opt}"
+               "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
                // Inherited from composite
-               "%composite_p = OpTypePointer Function %composite\n"
+               "%composite_p = OpTypePointer Function ${compositeType}\n"
                "%struct_t  = OpTypeStruct${structType}\n"
                "%struct_p  = OpTypePointer Function %struct_t\n"
                // Constants
                "${filler}"
                "${accessChainConstDeclaration}"
                // Inherited from custom
-               "%customptr = OpTypePointer Uniform %custom\n"
-               "%customarr = OpTypeRuntimeArray %custom\n"
+               "%customptr = OpTypePointer Uniform ${customType}\n"
+               "%customarr = OpTypeRuntimeArray ${customType}\n"
                "%buf       = OpTypeStruct %customarr\n"
                "%bufptr    = OpTypePointer Uniform %buf\n"
                "%indata    = OpVariable %bufptr Uniform\n"
@@ -6436,13 +8432,13 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
                "%main      = OpFunction %void None %voidf\n"
                "%label     = OpLabel\n"
                "%struct_v  = OpVariable %struct_p Function\n"
-               "%idval     = OpLoad %uvec3 %id\n"
+               "%idval     = OpLoad %u32vec3 %id\n"
                "%x         = OpCompositeExtract %u32 %idval 0\n"
                // Create the input/output type
                "%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
                "%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
                // Read the input value
-               "%inval     = OpLoad %custom %inloc\n"
+               "%inval     = OpLoad ${customType} %inloc\n"
                // Create the composite and fill it
                "${compositeConstruct}"
                // Create the struct and fill it with the composite
@@ -6453,12 +8449,13 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
                "             OpStore %struct_v %comp_obj\n"
                // Get deepest possible composite pointer
                "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
-               "%read_obj  = OpLoad %composite %inner_ptr\n"
+               "%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
                // Read back the stored value
-               "%read_val  = OpCompositeExtract %custom %read_obj${extractIndexes}\n"
+               "%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
                "             OpStore %outloc %read_val\n"
                "             OpReturn\n"
-               "             OpFunctionEnd\n").specialize(parameters);
+               "             OpFunctionEnd\n"
+       ).specialize(parameters);
 }
 
 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
@@ -6527,19 +8524,17 @@ const string specializeDefaultOutputShaderTemplate (const NumberType type, const
 {
        map<string, string> parameters(params);
 
-       parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
+       parameters["customType"]        = getAssemblyTypeName(type);
 
        // Declare the const value, and use it in the initializer
        if (params.find("constValue") != params.end())
        {
-               parameters["constDeclaration"]          = "%const      = OpConstant %in_type " + params.at("constValue") + "\n";
-               parameters["variableInitializer"]       = "%const";
+               parameters["variableInitializer"]       = " %const";
        }
        // Uninitialized case
        else
        {
-               parameters["constDeclaration"]          = "";
-               parameters["variableInitializer"]       = "";
+               parameters["commentDecl"]       = ";";
        }
 
        return StringTemplate(
@@ -6564,33 +8559,31 @@ const string specializeDefaultOutputShaderTemplate (const NumberType type, const
                "%voidf      = OpTypeFunction %void\n"
                "%u32        = OpTypeInt 32 0\n"
                "%i32        = OpTypeInt 32 1\n"
+               "%f32        = OpTypeFloat 32\n"
                "%uvec3      = OpTypeVector %u32 3\n"
                "%uvec3ptr   = OpTypePointer Input %uvec3\n"
-               // Custom types
-               "%in_type    = ${typeDeclaration}\n"
-               // "%const      = OpConstant %in_type ${constValue}\n"
-               "${constDeclaration}\n"
+               "${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
                // Derived types
-               "%in_ptr     = OpTypePointer Uniform %in_type\n"
-               "%in_arr     = OpTypeRuntimeArray %in_type\n"
+               "%in_ptr     = OpTypePointer Uniform ${customType}\n"
+               "%in_arr     = OpTypeRuntimeArray ${customType}\n"
                "%in_buf     = OpTypeStruct %in_arr\n"
                "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
                "%indata     = OpVariable %in_bufptr Uniform\n"
                "%outdata    = OpVariable %in_bufptr Uniform\n"
                "%id         = OpVariable %uvec3ptr Input\n"
-               "%var_ptr    = OpTypePointer Function %in_type\n"
+               "%var_ptr    = OpTypePointer Function ${customType}\n"
                // Constants
                "%zero       = OpConstant %i32 0\n"
                // Main function
                "%main       = OpFunction %void None %voidf\n"
                "%label      = OpLabel\n"
-               "%out_var    = OpVariable %var_ptr Function ${variableInitializer}\n"
+               "%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
                "%idval      = OpLoad %uvec3 %id\n"
                "%x          = OpCompositeExtract %u32 %idval 0\n"
                "%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
                "%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
 
-               "%outval     = OpLoad %in_type %out_var\n"
+               "%outval     = OpLoad ${customType} %out_var\n"
                "              OpStore %outloc %outval\n"
                "              OpReturn\n"
                "              OpFunctionEnd\n"
@@ -6607,11 +8600,13 @@ bool compareFloats (const std::vector<BufferSp>&, const vector<AllocationSp>& ou
 
        for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
        {
-               float expected;
-               memcpy(&expected, expectedOutputs[outputNdx]->data(), expectedOutputs[outputNdx]->getNumBytes());
+               vector<deUint8> expectedBytes;
+               float                   expected;
+               float                   actual;
 
-               float actual;
-               memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedOutputs[outputNdx]->getNumBytes());
+               expectedOutputs[outputNdx]->getBytes(expectedBytes);
+               memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
+               memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
 
                // Test with epsilon
                if (fabs(expected - actual) > epsilon)
@@ -6633,9 +8628,12 @@ bool passthruVerify (const std::vector<BufferSp>&, const vector<AllocationSp>& o
        // Copy and discard the result.
        for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
        {
-               size_t width = expectedOutputs[outputNdx]->getNumBytes();
+               vector<deUint8> expectedBytes;
+               expectedOutputs[outputNdx]->getBytes(expectedBytes);
+
+               const size_t    width                   = expectedBytes.size();
+               vector<char>    data                    (width);
 
-               vector<char> data(width);
                memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
        }
        return true;
@@ -6752,6 +8750,7 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        de::MovePtr<tcu::TestCaseGroup> computeTests            (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
        de::MovePtr<tcu::TestCaseGroup> graphicsTests           (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
 
+       computeTests->addChild(createLocalSizeGroup(testCtx));
        computeTests->addChild(createOpNopGroup(testCtx));
        computeTests->addChild(createOpFUnordGroup(testCtx));
        computeTests->addChild(createOpAtomicGroup(testCtx, false));
@@ -6779,13 +8778,33 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        computeTests->addChild(createOpUnreachableGroup(testCtx));
        computeTests ->addChild(createOpQuantizeToF16Group(testCtx));
        computeTests ->addChild(createOpFRemGroup(testCtx));
+       computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
+       computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
+       computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
+       computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
        computeTests->addChild(createSConvertTests(testCtx));
        computeTests->addChild(createUConvertTests(testCtx));
        computeTests->addChild(createOpCompositeInsertGroup(testCtx));
        computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
        computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
+       computeTests->addChild(createOpNMinGroup(testCtx));
+       computeTests->addChild(createOpNMaxGroup(testCtx));
+       computeTests->addChild(createOpNClampGroup(testCtx));
+       {
+               de::MovePtr<tcu::TestCaseGroup> computeAndroidTests     (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
+
+               computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+               computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+
+               computeTests->addChild(computeAndroidTests.release());
+       }
+
        computeTests->addChild(create16BitStorageComputeGroup(testCtx));
+       computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
+       computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
+       computeTests->addChild(createIndexingComputeGroup(testCtx));
        computeTests->addChild(createVariablePointersComputeGroup(testCtx));
+       computeTests->addChild(createImageSamplerComputeGroup(testCtx));
        graphicsTests->addChild(createOpNopTests(testCtx));
        graphicsTests->addChild(createOpSourceTests(testCtx));
        graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
@@ -6807,9 +8826,24 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        graphicsTests->addChild(createBarrierTests(testCtx));
        graphicsTests->addChild(createDecorationGroupTests(testCtx));
        graphicsTests->addChild(createFRemTests(testCtx));
+       graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
+       graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
+
+       {
+               de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests    (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
+
+               graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+               graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
+
+               graphicsTests->addChild(graphicsAndroidTests.release());
+       }
 
        graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
+       graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
+       graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
+       graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
        graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
+       graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
 
        instructionTests->addChild(computeTests.release());
        instructionTests->addChild(graphicsTests.release());