X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=external%2Fvulkancts%2Fmodules%2Fvulkan%2Fspirv_assembly%2FvktSpvAsmInstructionTests.cpp;h=1f657189c24bb8ff8efc40ad00b9d4079d2d1f77;hb=dae5394b0fadda4abdbdc964bf5dc6ee29151fc8;hp=57c9bfde808fe59985bb9ac5b7ccdf2c7f5f4e64;hpb=1f5107179a3b93d68b0f3f00b20f97b9ec6143c7;p=platform%2Fupstream%2FVK-GL-CTS.git

diff --git a/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp b/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp
index 57c9bfd..1f65718 100644
--- a/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp
+++ b/external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp
@@ -44,13 +44,16 @@
 #include "vkStrUtil.hpp"
 #include "vkTypeUtil.hpp"
 
-#include "deRandom.hpp"
 #include "deStringUtil.hpp"
 #include "deUniquePtr.hpp"
 #include "deMath.h"
 #include "tcuStringTemplate.hpp"
 
 #include "vktSpvAsm16bitStorageTests.hpp"
+#include "vktSpvAsmUboMatrixPaddingTests.hpp"
+#include "vktSpvAsmConditionalBranchTests.hpp"
+#include "vktSpvAsmIndexingTests.hpp"
+#include "vktSpvAsmImageSamplerTests.hpp"
 #include "vktSpvAsmComputeShaderCase.hpp"
 #include "vktSpvAsmComputeShaderTestUtil.hpp"
 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
@@ -63,6 +66,7 @@
 #include <string>
 #include <sstream>
 #include <utility>
+#include <stack>
 
 namespace vkt
 {
@@ -174,7 +178,7 @@ struct CaseParameter
 	CaseParameter	(const char* case_, const string& param_) : name(case_), param(param_) {}
 };
 
-// Assembly code used for testing OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
+// Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
 //
 // #version 430
 //
@@ -192,6 +196,119 @@ struct CaseParameter
 //   output_data.elements[x] = -input_data.elements[x];
 // }
 
+static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
+{
+	std::ostringstream out;
+	out << getComputeAsmShaderPreambleWithoutLocalSize();
+
+	if (useLiteralLocalSize)
+	{
+		out << "OpExecutionMode %main LocalSize "
+			<< workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
+	}
+
+	out << "OpSource GLSL 430\n"
+		"OpName %main           \"main\"\n"
+		"OpName %id             \"gl_GlobalInvocationID\"\n"
+		"OpDecorate %id BuiltIn GlobalInvocationId\n";
+
+	if (useSpecConstantWorkgroupSize)
+	{
+		out << "OpDecorate %spec_0 SpecId 100\n"
+			<< "OpDecorate %spec_1 SpecId 101\n"
+			<< "OpDecorate %spec_2 SpecId 102\n"
+			<< "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
+	}
+
+	out << getComputeAsmInputOutputBufferTraits()
+		<< getComputeAsmCommonTypes()
+		<< getComputeAsmInputOutputBuffer()
+		<< "%id        = OpVariable %uvec3ptr Input\n"
+		<< "%zero      = OpConstant %i32 0 \n";
+
+	if (useSpecConstantWorkgroupSize)
+	{
+		out	<< "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
+			<< "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
+			<< "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
+			<< "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
+	}
+
+	out << "%main      = OpFunction %void None %voidf\n"
+		<< "%label     = OpLabel\n"
+		<< "%idval     = OpLoad %uvec3 %id\n"
+		<< "%ndx         = OpCompositeExtract %u32 %idval " << ndx << "\n"
+
+			"%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
+			"%inval     = OpLoad %f32 %inloc\n"
+			"%neg       = OpFNegate %f32 %inval\n"
+			"%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
+			"             OpStore %outloc %neg\n"
+			"             OpReturn\n"
+			"             OpFunctionEnd\n";
+	return out.str();
+}
+
+tcu::TestCaseGroup* createLocalSizeGroup (tcu::TestContext& testCtx)
+{
+	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "localsize", ""));
+	ComputeShaderSpec				spec;
+	de::Random						rnd				(deStringHash(group->getName()));
+	const deUint32					numElements		= 64u;
+	vector<float>					positiveFloats	(numElements, 0);
+	vector<float>					negativeFloats	(numElements, 0);
+
+	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
+
+	for (size_t ndx = 0; ndx < numElements; ++ndx)
+		negativeFloats[ndx] = -positiveFloats[ndx];
+
+	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
+	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
+
+	spec.numWorkGroups = IVec3(numElements, 1, 1);
+
+	spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, 1), 0u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, 1), 0u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, 1), 0u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
+
+	spec.numWorkGroups = IVec3(1, 1, 1);
+
+	spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(numElements, 1, 1), 0u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(numElements, 1, 1), 0u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(numElements, 1, 1), 0u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, numElements, 1), 1u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, numElements, 1), 1u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, numElements, 1), 1u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, numElements), 2u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, numElements), 2u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
+
+	spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, numElements), 2u);
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
+
+	return group.release();
+}
+
 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
 {
 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
@@ -250,14 +367,21 @@ bool compareFUnord (const std::vector<BufferSp>& inputs, const vector<Allocation
 	if (outputAllocs.size() != 1)
 		return false;
 
-	const BufferSp&	expectedOutput			= expectedOutputs[0];
-	const deInt32*	expectedOutputAsInt		= static_cast<const deInt32*>(expectedOutputs[0]->data());
-	const deInt32*	outputAsInt				= static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
-	const float*	input1AsFloat			= static_cast<const float*>(inputs[0]->data());
-	const float*	input2AsFloat			= static_cast<const float*>(inputs[1]->data());
-	bool returnValue						= true;
+	vector<deUint8>	input1Bytes;
+	vector<deUint8>	input2Bytes;
+	vector<deUint8>	expectedBytes;
+
+	inputs[0]->getBytes(input1Bytes);
+	inputs[1]->getBytes(input2Bytes);
+	expectedOutputs[0]->getBytes(expectedBytes);
 
-	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(deInt32); ++idx)
+	const deInt32* const	expectedOutputAsInt		= reinterpret_cast<const deInt32* const>(&expectedBytes.front());
+	const deInt32* const	outputAsInt				= static_cast<const deInt32* const>(outputAllocs[0]->getHostPtr());
+	const float* const		input1AsFloat			= reinterpret_cast<const float* const>(&input1Bytes.front());
+	const float* const		input2AsFloat			= reinterpret_cast<const float* const>(&input2Bytes.front());
+	bool returnValue								= true;
+
+	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
 	{
 		if (outputAsInt[idx] != expectedOutputAsInt[idx])
 		{
@@ -401,14 +525,14 @@ struct OpAtomicCase
 {
 	const char*		name;
 	const char*		assembly;
-	void			(*calculateExpected)(deInt32&, deInt32);
+	OpAtomicType	opAtomic;
 	deInt32			numOutputElements;
 
-					OpAtomicCase			(const char* _name, const char* _assembly, void (*_calculateExpected)(deInt32&, deInt32), deInt32 _numOutputElements)
+					OpAtomicCase			(const char* _name, const char* _assembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
 						: name				(_name)
 						, assembly			(_assembly)
-						, calculateExpected	(_calculateExpected)
-						, numOutputElements (_numOutputElements) {}
+						, opAtomic			(_opAtomic)
+						, numOutputElements	(_numOutputElements) {}
 };
 
 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer)
@@ -416,8 +540,7 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
 	de::MovePtr<tcu::TestCaseGroup>	group				(new tcu::TestCaseGroup(testCtx,
 																				useStorageBuffer ? "opatomic_storage_buffer" : "opatomic",
 																				"Test the OpAtomic* opcodes"));
-	de::Random						rnd					(deStringHash(group->getName()));
-	const int						numElements			= 1000000;
+	const int						numElements			= 65535;
 	vector<OpAtomicCase>			cases;
 
 	const StringTemplate			shaderTemplate	(
@@ -446,15 +569,7 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
 		"OpMemberDecorate %sumbuf 0 Coherent\n"
 		"OpMemberDecorate %sumbuf 0 Offset 0\n"
 
-		+ string(getComputeAsmCommonTypes()) +
-		"%void      = OpTypeVoid\n"
-		"%voidf     = OpTypeFunction %void\n"
-		"%u32       = OpTypeInt 32 0\n"
-		"%i32       = OpTypeInt 32 1\n"
-		"%uvec3     = OpTypeVector %u32 3\n"
-		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
-		"%i32ptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %i32\n"
-		"%i32arr    = OpTypeRuntimeArray %i32\n"
+		+ getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
 
 		"%buf       = OpTypeStruct %i32arr\n"
 		"%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
@@ -484,25 +599,24 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
 		"             OpReturn\n"
 		"             OpFunctionEnd\n");
 
-	#define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, NUM_OUTPUT_ELEMENTS) \
+	#define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
 	do { \
 		DE_STATIC_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
-		struct calculateExpected_##NAME { static void calculateExpected(deInt32& expected, deInt32 input) CALCULATE_EXPECTED }; /* NOLINT(CALCULATE_EXPECTED) */ \
-		cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, calculateExpected_##NAME::calculateExpected, NUM_OUTPUT_ELEMENTS)); \
+		cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
 	} while (deGetFalse())
-	#define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, CALCULATE_EXPECTED) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, 1)
-	#define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, CALCULATE_EXPECTED) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, CALCULATE_EXPECTED, numElements)
+	#define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, 1)
+	#define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, OPATOMIC, numElements)
 
-	ADD_OPATOMIC_CASE_1(iadd,	"%unused    = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", { expected += input; } );
-	ADD_OPATOMIC_CASE_1(isub,	"%unused    = OpAtomicISub %i32 %outloc %one %zero %inval\n", { expected -= input; } );
-	ADD_OPATOMIC_CASE_1(iinc,	"%unused    = OpAtomicIIncrement %i32 %outloc %one %zero\n",  { ++expected; (void)input;} );
-	ADD_OPATOMIC_CASE_1(idec,	"%unused    = OpAtomicIDecrement %i32 %outloc %one %zero\n",  { --expected; (void)input;} );
+	ADD_OPATOMIC_CASE_1(iadd,	"%unused    = OpAtomicIAdd %i32 %outloc %one %zero %inval\n", OPATOMIC_IADD );
+	ADD_OPATOMIC_CASE_1(isub,	"%unused    = OpAtomicISub %i32 %outloc %one %zero %inval\n", OPATOMIC_ISUB );
+	ADD_OPATOMIC_CASE_1(iinc,	"%unused    = OpAtomicIIncrement %i32 %outloc %one %zero\n",  OPATOMIC_IINC );
+	ADD_OPATOMIC_CASE_1(idec,	"%unused    = OpAtomicIDecrement %i32 %outloc %one %zero\n",  OPATOMIC_IDEC );
 	ADD_OPATOMIC_CASE_N(load,	"%inval2    = OpAtomicLoad %i32 %inloc %zero %zero\n"
-								"             OpStore %outloc %inval2\n",  { expected = input;} );
-	ADD_OPATOMIC_CASE_N(store,	"             OpAtomicStore %outloc %zero %zero %inval\n",  { expected = input;} );
+								"             OpStore %outloc %inval2\n",  OPATOMIC_LOAD );
+	ADD_OPATOMIC_CASE_N(store,	"             OpAtomicStore %outloc %zero %zero %inval\n",  OPATOMIC_STORE );
 	ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
 								"             OpStore %outloc %even\n"
-								"%unused    = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n",  { expected = (input % 2) == 0 ? -1 : 1;} );
+								"%unused    = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n",  OPATOMIC_COMPEX );
 
 	#undef ADD_OPATOMIC_CASE
 	#undef ADD_OPATOMIC_CASE_1
@@ -524,14 +638,8 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
 		if (useStorageBuffer)
 			spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
 
-		fillRandomScalars(rnd, 1, 100, &inputInts[0], numElements);
-		for (size_t ndx = 0; ndx < numElements; ++ndx)
-		{
-			cases[caseNdx].calculateExpected((cases[caseNdx].numOutputElements == 1) ? expected[0] : expected[ndx], inputInts[ndx]);
-		}
-
-		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
-		spec.outputs.push_back(BufferSp(new Int32Buffer(expected)));
+		spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
+		spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
 		spec.numWorkGroups = IVec3(numElements, 1, 1);
 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
 	}
@@ -676,11 +784,12 @@ bool compareNoContractCase(const std::vector<BufferSp>&, const vector<Allocation
 	if (outputAllocs.size() != 1)
 		return false;
 
-	// We really just need this for size because we are not comparing the exact values.
-	const BufferSp&	expectedOutput	= expectedOutputs[0];
-	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	// Only size is needed because we are not comparing the exact values.
+	size_t byteSize = expectedOutputs[0]->getByteSize();
+
+	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
 
-	for(size_t i = 0; i < expectedOutput->getNumBytes() / sizeof(float); ++i) {
+	for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
 		if (outputAsFloat[i] != 0.f &&
 			outputAsFloat[i] != -ldexp(1, -24)) {
 			return false;
@@ -785,11 +894,13 @@ bool compareFRem(const std::vector<BufferSp>&, const vector<AllocationSp>& outpu
 	if (outputAllocs.size() != 1)
 		return false;
 
-	const BufferSp& expectedOutput = expectedOutputs[0];
-	const float *expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
-	const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	vector<deUint8>	expectedBytes;
+	expectedOutputs[0]->getBytes(expectedBytes);
 
-	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+	const float*	expectedOutputAsFloat	= reinterpret_cast<const float*>(&expectedBytes.front());
+	const float*	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
 	{
 		const float f0 = expectedOutputAsFloat[idx];
 		const float f1 = outputAsFloat[idx];
@@ -884,11 +995,14 @@ bool compareNMin (const std::vector<BufferSp>&, const vector<AllocationSp>& outp
 	if (outputAllocs.size() != 1)
 		return false;
 
-	const BufferSp&		expectedOutput			= expectedOutputs[0];
-	const float* const	expectedOutputAsFloat	= static_cast<const float*>(expectedOutput->data());
-	const float* const	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	const BufferSp&			expectedOutput			(expectedOutputs[0]);
+	std::vector<deUint8>	data;
+	expectedOutput->getBytes(data);
+
+	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
+	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
 
-	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
 	{
 		const float f0 = expectedOutputAsFloat[idx];
 		const float f1 = outputAsFloat[idx];
@@ -1005,11 +1119,14 @@ bool compareNMax (const std::vector<BufferSp>&, const vector<AllocationSp>& outp
 	if (outputAllocs.size() != 1)
 		return false;
 
-	const BufferSp&		expectedOutput			= expectedOutputs[0];
-	const float* const	expectedOutputAsFloat	= static_cast<const float*>(expectedOutput->data());
-	const float* const	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	const BufferSp&			expectedOutput			= expectedOutputs[0];
+	std::vector<deUint8>	data;
+	expectedOutput->getBytes(data);
 
-	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
+	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
+
+	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
 	{
 		const float f0 = expectedOutputAsFloat[idx];
 		const float f1 = outputAsFloat[idx];
@@ -1125,11 +1242,14 @@ bool compareNClamp (const std::vector<BufferSp>&, const vector<AllocationSp>& ou
 	if (outputAllocs.size() != 1)
 		return false;
 
-	const BufferSp&		expectedOutput			= expectedOutputs[0];
-	const float* const	expectedOutputAsFloat	= static_cast<const float*>(expectedOutput->data());
-	const float* const	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	const BufferSp&			expectedOutput			= expectedOutputs[0];
+	std::vector<deUint8>	data;
+	expectedOutput->getBytes(data);
+
+	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
+	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
 
-	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float) / 2; ++idx)
+	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
 	{
 		const float e0 = expectedOutputAsFloat[idx * 2];
 		const float e1 = expectedOutputAsFloat[idx * 2 + 1];
@@ -2358,7 +2478,8 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 	vector<deInt32>					outputInts3		(numElements, 0);
 	vector<deInt32>					outputInts4		(numElements, 0);
 	const StringTemplate			shaderTemplate	(
-		string(getComputeAsmShaderPreamble()) +
+		"${CAPABILITIES:opt}"
+		+ string(getComputeAsmShaderPreamble()) +
 
 		"OpName %main           \"main\"\n"
 		"OpName %id             \"gl_GlobalInvocationID\"\n"
@@ -2370,6 +2491,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 
 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
 
+		"${OPTYPE_DEFINITIONS:opt}"
 		"%buf     = OpTypeStruct %i32arr\n"
 		"%bufptr  = OpTypePointer Uniform %buf\n"
 		"%indata    = OpVariable %bufptr Uniform\n"
@@ -2384,6 +2506,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 
 		"%main      = OpFunction %void None %voidf\n"
 		"%label     = OpLabel\n"
+		"${TYPE_CONVERT:opt}"
 		"%idval     = OpLoad %uvec3 %id\n"
 		"%x         = OpCompositeExtract %u32 %idval 0\n"
 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
@@ -2405,6 +2528,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 	}
 
 	const char addScToInput[]		= "OpIAdd %i32 %inval %sc_final";
+	const char addSc32ToInput[]		= "OpIAdd %i32 %inval %sc_final32";
 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_final %inval %zero";
 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_final %zero %inval";
 
@@ -2439,12 +2563,15 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 	cases.push_back(SpecConstantTwoIntCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",				-43,	0,		addScToInput,		outputInts1));
 	cases.push_back(SpecConstantTwoIntCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",				1,		0,		selectFalseUsingSc,	outputInts2));
 	cases.push_back(SpecConstantTwoIntCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %zero",	1,		42,		addScToInput,		outputInts1));
-	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
+	cases.push_back(SpecConstantTwoIntCase("sconvert",				" %i32 0",		" %i32 0",		"%i16",		"SConvert             %sc_0",				-11200,	0,		addSc32ToInput,		outputInts3));
+	// -969998336 stored as 32-bit two's complement is the binary representation of -11200 as IEEE-754 Float
+	cases.push_back(SpecConstantTwoIntCase("fconvert",				" %f32 0",		" %f32 0",		"%f64",		"FConvert             %sc_0",				-969998336, 0,	addSc32ToInput,		outputInts3));
 
 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
 	{
 		map<string, string>		specializations;
 		ComputeShaderSpec		spec;
+		ComputeTestFeatures		features = COMPUTE_TEST_USES_NONE;
 
 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
@@ -2452,6 +2579,24 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
 
+		// Special SPIR-V code for SConvert-case
+		if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
+		{
+			features								= COMPUTE_TEST_USES_INT16;
+			specializations["CAPABILITIES"]			= "OpCapability Int16\n";							// Adds 16-bit integer capability
+			specializations["OPTYPE_DEFINITIONS"]	= "%i16 = OpTypeInt 16 1\n";						// Adds 16-bit integer type
+			specializations["TYPE_CONVERT"]			= "%sc_final32 = OpSConvert %i32 %sc_final\n";		// Converts 16-bit integer to 32-bit integer
+		}
+
+		// Special SPIR-V code for FConvert-case
+		if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
+		{
+			features								= COMPUTE_TEST_USES_FLOAT64;
+			specializations["CAPABILITIES"]			= "OpCapability Float64\n";							// Adds 64-bit float capability
+			specializations["OPTYPE_DEFINITIONS"]	= "%f64 = OpTypeFloat 64\n";						// Adds 64-bit float type
+			specializations["TYPE_CONVERT"]			= "%sc_final32 = OpConvertFToS %i32 %sc_final\n";	// Converts 64-bit float to 32-bit integer
+		}
+
 		spec.assembly = shaderTemplate.specialize(specializations);
 		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
 		spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
@@ -2459,7 +2604,7 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 		spec.specConstants.push_back(cases[caseNdx].scActualValue0);
 		spec.specConstants.push_back(cases[caseNdx].scActualValue1);
 
-		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
+		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec, features));
 	}
 
 	ComputeShaderSpec				spec;
@@ -2529,18 +2674,576 @@ tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
 	return group.release();
 }
 
+void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
+{
+	ComputeShaderSpec	specInt;
+	ComputeShaderSpec	specFloat;
+	ComputeShaderSpec	specVec3;
+	ComputeShaderSpec	specMat4;
+	ComputeShaderSpec	specArray;
+	ComputeShaderSpec	specStruct;
+	de::Random			rnd				(deStringHash(group->getName()));
+	const int			numElements		= 100;
+	vector<float>		inputFloats		(numElements, 0);
+	vector<float>		outputFloats	(numElements, 0);
+
+	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
+
+	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
+	floorAll(inputFloats);
+
+	for (size_t ndx = 0; ndx < numElements; ++ndx)
+	{
+		// Just check if the value is positive or not
+		outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
+	}
+
+	// All of the tests are of the form:
+	//
+	// testtype r
+	//
+	// if (inputdata > 0)
+	//   r = 1
+	// else
+	//   r = -1
+	//
+	// return (float)r
+
+	specFloat.assembly =
+		string(getComputeAsmShaderPreamble()) +
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id = OpVariable %uvec3ptr Input\n"
+		"%zero       = OpConstant %i32 0\n"
+		"%float_0    = OpConstant %f32 0.0\n"
+		"%float_1    = OpConstant %f32 1.0\n"
+		"%float_n1   = OpConstant %f32 -1.0\n"
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+		"            OpSelectionMerge %cm None\n"
+		"            OpBranchConditional %comp %tb %fb\n"
+		"%tb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%fb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%cm       = OpLabel\n"
+		"%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+	specFloat.numWorkGroups = IVec3(numElements, 1, 1);
+
+	specMat4.assembly =
+		string(getComputeAsmShaderPreamble()) +
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id = OpVariable %uvec3ptr Input\n"
+		"%v4f32      = OpTypeVector %f32 4\n"
+		"%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
+		"%zero       = OpConstant %i32 0\n"
+		"%float_0    = OpConstant %f32 0.0\n"
+		"%float_1    = OpConstant %f32 1.0\n"
+		"%float_n1   = OpConstant %f32 -1.0\n"
+		"%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
+		"%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
+		"%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
+		"%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
+		"%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
+		"%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
+		"%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
+		"%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
+		"%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
+		"%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+		"            OpSelectionMerge %cm None\n"
+		"            OpBranchConditional %comp %tb %fb\n"
+		"%tb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%fb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%cm       = OpLabel\n"
+		"%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
+		"%res      = OpCompositeExtract %f32 %mres 2 2\n"
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+	specMat4.numWorkGroups = IVec3(numElements, 1, 1);
+
+	specVec3.assembly =
+		string(getComputeAsmShaderPreamble()) +
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id = OpVariable %uvec3ptr Input\n"
+		"%zero       = OpConstant %i32 0\n"
+		"%float_0    = OpConstant %f32 0.0\n"
+		"%float_1    = OpConstant %f32 1.0\n"
+		"%float_n1   = OpConstant %f32 -1.0\n"
+		"%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
+		"%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+		"            OpSelectionMerge %cm None\n"
+		"            OpBranchConditional %comp %tb %fb\n"
+		"%tb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%fb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%cm       = OpLabel\n"
+		"%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
+		"%res      = OpCompositeExtract %f32 %vres 2\n"
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+	specVec3.numWorkGroups = IVec3(numElements, 1, 1);
+
+	specInt.assembly =
+		string(getComputeAsmShaderPreamble()) +
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id = OpVariable %uvec3ptr Input\n"
+		"%zero       = OpConstant %i32 0\n"
+		"%float_0    = OpConstant %f32 0.0\n"
+		"%i1         = OpConstant %i32 1\n"
+		"%i2         = OpConstant %i32 -1\n"
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+		"            OpSelectionMerge %cm None\n"
+		"            OpBranchConditional %comp %tb %fb\n"
+		"%tb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%fb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%cm       = OpLabel\n"
+		"%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
+		"%res      = OpConvertSToF %f32 %ires\n"
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+	specInt.numWorkGroups = IVec3(numElements, 1, 1);
+
+	specArray.assembly =
+		string(getComputeAsmShaderPreamble()) +
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id = OpVariable %uvec3ptr Input\n"
+		"%zero       = OpConstant %i32 0\n"
+		"%u7         = OpConstant %u32 7\n"
+		"%float_0    = OpConstant %f32 0.0\n"
+		"%float_1    = OpConstant %f32 1.0\n"
+		"%float_n1   = OpConstant %f32 -1.0\n"
+		"%f32a7      = OpTypeArray %f32 %u7\n"
+		"%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
+		"%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+		"            OpSelectionMerge %cm None\n"
+		"            OpBranchConditional %comp %tb %fb\n"
+		"%tb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%fb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%cm       = OpLabel\n"
+		"%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
+		"%res      = OpCompositeExtract %f32 %ares 5\n"
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+	specArray.numWorkGroups = IVec3(numElements, 1, 1);
+
+	specStruct.assembly =
+		string(getComputeAsmShaderPreamble()) +
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id = OpVariable %uvec3ptr Input\n"
+		"%zero       = OpConstant %i32 0\n"
+		"%float_0    = OpConstant %f32 0.0\n"
+		"%float_1    = OpConstant %f32 1.0\n"
+		"%float_n1   = OpConstant %f32 -1.0\n"
+
+		"%v2f32      = OpTypeVector %f32 2\n"
+		"%Data2      = OpTypeStruct %f32 %v2f32\n"
+		"%Data       = OpTypeStruct %Data2 %f32\n"
+
+		"%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
+		"%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
+		"%s1         = OpConstantComposite %Data %in1b %float_1\n"
+		"%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
+		"%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
+		"%s2         = OpConstantComposite %Data %in2b %float_n1\n"
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+		"            OpSelectionMerge %cm None\n"
+		"            OpBranchConditional %comp %tb %fb\n"
+		"%tb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%fb       = OpLabel\n"
+		"            OpBranch %cm\n"
+		"%cm       = OpLabel\n"
+		"%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
+		"%res      = OpCompositeExtract %f32 %sres 0 0\n"
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
+	specStruct.numWorkGroups = IVec3(numElements, 1, 1);
+
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
+}
+
+string generateConstantDefinitions (int count)
+{
+	std::ostringstream	r;
+	for (int i = 0; i < count; i++)
+		r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
+	r << "\n";
+	return r.str();
+}
+
+string generateSwitchCases (int count)
+{
+	std::ostringstream	r;
+	for (int i = 0; i < count; i++)
+		r << " " << i << " %case" << i;
+	r << "\n";
+	return r.str();
+}
+
+string generateSwitchTargets (int count)
+{
+	std::ostringstream	r;
+	for (int i = 0; i < count; i++)
+		r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
+	r << "\n";
+	return r.str();
+}
+
+string generateOpPhiParams (int count)
+{
+	std::ostringstream	r;
+	for (int i = 0; i < count; i++)
+		r << " %cf" << (i * 10 + 5) << " %case" << i;
+	r << "\n";
+	return r.str();
+}
+
+string generateIntWidth (int value)
+{
+	std::ostringstream	r;
+	r << value;
+	return r.str();
+}
+
+// Expand input string by injecting "ABC" between the input
+// string characters. The acc/add/treshold parameters are used
+// to skip some of the injections to make the result less
+// uniform (and a lot shorter).
+string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
+{
+	std::ostringstream	res;
+	const char*			p = s.c_str();
+
+	while (*p)
+	{
+		res << *p;
+		acc += add;
+		if (acc > treshold)
+		{
+			acc -= treshold;
+			res << "ABC";
+		}
+		p++;
+	}
+	return res.str();
+}
+
+// Calculate expected result based on the code string
+float calcOpPhiCase5 (float val, const string& s)
+{
+	const char*		p		= s.c_str();
+	float			x[8];
+	bool			b[8];
+	const float		tv[8]	= { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
+	const float		v		= deFloatAbs(val);
+	float			res		= 0;
+	int				depth	= -1;
+	int				skip	= 0;
+
+	for (int i = 7; i >= 0; --i)
+		x[i] = std::fmod((float)v, (float)(2 << i));
+	for (int i = 7; i >= 0; --i)
+		b[i] = x[i] > tv[i];
+
+	while (*p)
+	{
+		if (*p == 'A')
+		{
+			depth++;
+			if (skip == 0 && b[depth])
+			{
+				res++;
+			}
+			else
+				skip++;
+		}
+		if (*p == 'B')
+		{
+			if (skip)
+				skip--;
+			if (b[depth] || skip)
+				skip++;
+		}
+		if (*p == 'C')
+		{
+			depth--;
+			if (skip)
+				skip--;
+		}
+		p++;
+	}
+	return res;
+}
+
+// In the code string, the letters represent the following:
+//
+// A:
+//     if (certain bit is set)
+//     {
+//       result++;
+//
+// B:
+//     } else {
+//
+// C:
+//     }
+//
+// examples:
+// AABCBC leads to if(){r++;if(){r++;}else{}}else{}
+// ABABCC leads to if(){r++;}else{if(){r++;}else{}}
+// ABCABC leads to if(){r++;}else{}if(){r++;}else{}
+//
+// Code generation gets a bit complicated due to the else-branches,
+// which do not generate new values. Thus, the generator needs to
+// keep track of the previous variable change seen by the else
+// branch.
+string generateOpPhiCase5 (const string& s)
+{
+	std::stack<int>				idStack;
+	std::stack<std::string>		value;
+	std::stack<std::string>		valueLabel;
+	std::stack<std::string>		mergeLeft;
+	std::stack<std::string>		mergeRight;
+	std::ostringstream			res;
+	const char*					p			= s.c_str();
+	int							depth		= -1;
+	int							currId		= 0;
+	int							iter		= 0;
+
+	idStack.push(-1);
+	value.push("%f32_0");
+	valueLabel.push("%f32_0 %entry");
+
+	while (*p)
+	{
+		if (*p == 'A')
+		{
+			depth++;
+			currId = iter;
+			idStack.push(currId);
+			res << "\tOpSelectionMerge %m" << currId << " None\n";
+			res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
+			res << "%t" << currId << " = OpLabel\n";
+			res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
+			std::ostringstream tag;
+			tag << "%rt" << currId;
+			value.push(tag.str());
+			tag << " %t" << currId;
+			valueLabel.push(tag.str());
+		}
+
+		if (*p == 'B')
+		{
+			mergeLeft.push(valueLabel.top());
+			value.pop();
+			valueLabel.pop();
+			res << "\tOpBranch %m" << currId << "\n";
+			res << "%f" << currId << " = OpLabel\n";
+			std::ostringstream tag;
+			tag << value.top() << " %f" << currId;
+			valueLabel.pop();
+			valueLabel.push(tag.str());
+		}
+
+		if (*p == 'C')
+		{
+			mergeRight.push(valueLabel.top());
+			res << "\tOpBranch %m" << currId << "\n";
+			res << "%m" << currId << " = OpLabel\n";
+			if (*(p + 1) == 0)
+				res << "%res"; // last result goes to %res
+			else
+				res << "%rm" << currId;
+			res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
+			std::ostringstream tag;
+			tag << "%rm" << currId;
+			value.pop();
+			value.push(tag.str());
+			tag << " %m" << currId;
+			valueLabel.pop();
+			valueLabel.push(tag.str());
+			mergeLeft.pop();
+			mergeRight.pop();
+			depth--;
+			idStack.pop();
+			currId = idStack.top();
+		}
+		p++;
+		iter++;
+	}
+	return res.str();
+}
+
 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
 {
 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
 	ComputeShaderSpec				spec1;
 	ComputeShaderSpec				spec2;
 	ComputeShaderSpec				spec3;
+	ComputeShaderSpec				spec4;
+	ComputeShaderSpec				spec5;
 	de::Random						rnd				(deStringHash(group->getName()));
 	const int						numElements		= 100;
 	vector<float>					inputFloats		(numElements, 0);
 	vector<float>					outputFloats1	(numElements, 0);
 	vector<float>					outputFloats2	(numElements, 0);
 	vector<float>					outputFloats3	(numElements, 0);
+	vector<float>					outputFloats4	(numElements, 0);
+	vector<float>					outputFloats5	(numElements, 0);
+	std::string						codestring		= "ABC";
+	const int						test4Width		= 1024;
+
+	// Build case 5 code string. Each iteration makes the hierarchy more complicated.
+	// 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
+	// shader code.
+	for (int i = 0, acc = 0; i < 9; i++)
+		codestring = expandOpPhiCase5(codestring, acc, 7, 24);
 
 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
 
@@ -2558,6 +3261,11 @@ tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
 		}
 		outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
 		outputFloats3[ndx] = 8.5f - inputFloats[ndx];
+
+		int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
+		outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
+
+		outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
 	}
 
 	spec1.assembly =
@@ -2710,6 +3418,142 @@ tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
 
 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
 
+	spec4.assembly =
+		"OpCapability Shader\n"
+		"%ext = OpExtInstImport \"GLSL.std.450\"\n"
+		"OpMemoryModel Logical GLSL450\n"
+		"OpEntryPoint GLCompute %main \"main\" %id\n"
+		"OpExecutionMode %main LocalSize 1 1 1\n"
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id       = OpVariable %uvec3ptr Input\n"
+		"%zero     = OpConstant %i32 0\n"
+		"%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
+
+		+ generateConstantDefinitions(test4Width) +
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+		"%xf       = OpConvertUToF %f32 %x\n"
+		"%xm       = OpFMul %f32 %xf %inval\n"
+		"%xa       = OpExtInst %f32 %ext FAbs %xm\n"
+		"%xi       = OpConvertFToU %u32 %xa\n"
+		"%selector = OpUMod %u32 %xi %cimod\n"
+		"            OpSelectionMerge %phi None\n"
+		"            OpSwitch %selector %default "
+
+		+ generateSwitchCases(test4Width) +
+
+		"%default  = OpLabel\n"
+		"            OpUnreachable\n"
+
+		+ generateSwitchTargets(test4Width) +
+
+		"%phi      = OpLabel\n"
+		"%result   = OpPhi %f32"
+
+		+ generateOpPhiParams(test4Width) +
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %result\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
+	spec4.numWorkGroups = IVec3(numElements, 1, 1);
+
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
+
+	spec5.assembly =
+		"OpCapability Shader\n"
+		"%ext      = OpExtInstImport \"GLSL.std.450\"\n"
+		"OpMemoryModel Logical GLSL450\n"
+		"OpEntryPoint GLCompute %main \"main\" %id\n"
+		"OpExecutionMode %main LocalSize 1 1 1\n"
+		"%code     = OpString \"" + codestring + "\"\n"
+
+		"OpSource GLSL 430\n"
+		"OpName %main \"main\"\n"
+		"OpName %id \"gl_GlobalInvocationID\"\n"
+
+		"OpDecorate %id BuiltIn GlobalInvocationId\n"
+
+		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
+
+		"%id       = OpVariable %uvec3ptr Input\n"
+		"%zero     = OpConstant %i32 0\n"
+		"%f32_0    = OpConstant %f32 0.0\n"
+		"%f32_0_5  = OpConstant %f32 0.5\n"
+		"%f32_1    = OpConstant %f32 1.0\n"
+		"%f32_1_5  = OpConstant %f32 1.5\n"
+		"%f32_2    = OpConstant %f32 2.0\n"
+		"%f32_3_5  = OpConstant %f32 3.5\n"
+		"%f32_4    = OpConstant %f32 4.0\n"
+		"%f32_7_5  = OpConstant %f32 7.5\n"
+		"%f32_8    = OpConstant %f32 8.0\n"
+		"%f32_15_5 = OpConstant %f32 15.5\n"
+		"%f32_16   = OpConstant %f32 16.0\n"
+		"%f32_31_5 = OpConstant %f32 31.5\n"
+		"%f32_32   = OpConstant %f32 32.0\n"
+		"%f32_63_5 = OpConstant %f32 63.5\n"
+		"%f32_64   = OpConstant %f32 64.0\n"
+		"%f32_127_5 = OpConstant %f32 127.5\n"
+		"%f32_128  = OpConstant %f32 128.0\n"
+		"%f32_256  = OpConstant %f32 256.0\n"
+
+		"%main     = OpFunction %void None %voidf\n"
+		"%entry    = OpLabel\n"
+		"%idval    = OpLoad %uvec3 %id\n"
+		"%x        = OpCompositeExtract %u32 %idval 0\n"
+		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
+		"%inval    = OpLoad %f32 %inloc\n"
+
+		"%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
+		"%x8       = OpFMod %f32 %xabs %f32_256\n"
+		"%x7       = OpFMod %f32 %xabs %f32_128\n"
+		"%x6       = OpFMod %f32 %xabs %f32_64\n"
+		"%x5       = OpFMod %f32 %xabs %f32_32\n"
+		"%x4       = OpFMod %f32 %xabs %f32_16\n"
+		"%x3       = OpFMod %f32 %xabs %f32_8\n"
+		"%x2       = OpFMod %f32 %xabs %f32_4\n"
+		"%x1       = OpFMod %f32 %xabs %f32_2\n"
+
+		"%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
+		"%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
+		"%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
+		"%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
+		"%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
+		"%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
+		"%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
+		"%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
+
+		+ generateOpPhiCase5(codestring) +
+
+		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
+		"            OpStore %outloc %res\n"
+		"            OpReturn\n"
+
+		"            OpFunctionEnd\n";
+	spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
+	spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
+	spec5.numWorkGroups = IVec3(numElements, 1, 1);
+
+	group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
+
+	createOpPhiVartypeTests(group, testCtx);
+
 	return group.release();
 }
 
@@ -3362,11 +4206,12 @@ bool compareOpQuantizeF16ComputeExactCase (const std::vector<BufferSp>&, const v
 	if (outputAllocs.size() != 1)
 		return false;
 
-	// We really just need this for size because we cannot compare Nans.
-	const BufferSp&	expectedOutput	= expectedOutputs[0];
-	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	// Only size is needed because we cannot compare Nans.
+	size_t byteSize = expectedOutputs[0]->getByteSize();
+
+	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
 
-	if (expectedOutput->getNumBytes() != 4*sizeof(float)) {
+	if (byteSize != 4*sizeof(float)) {
 		return false;
 	}
 
@@ -3402,11 +4247,12 @@ bool compareNan (const std::vector<BufferSp>&, const vector<AllocationSp>& outpu
 	if (outputAllocs.size() != 1)
 		return false;
 
-	// We really just need this for size because we cannot compare Nans.
-	const BufferSp& expectedOutput		= expectedOutputs[0];
-	const float* output_as_float		= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
+	// Only size is needed because we cannot compare Nans.
+	size_t byteSize = expectedOutputs[0]->getByteSize();
 
-	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
+	const float* const	output_as_float	= static_cast<const float* const>(outputAllocs[0]->getHostPtr());
+
+	for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
 	{
 		if (!deFloatIsNaN(output_as_float[idx]))
 		{
@@ -5144,6 +5990,7 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
 		"OpDecorate %sc_1  SpecId 1\n";
 
 	const char	typesAndConstants1[]	=
+		"${OPTYPE_DEFINITIONS:opt}"
 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
 		"%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
@@ -5152,6 +5999,7 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
 		"%param     = OpFunctionParameter %v4f32\n"
 		"%label     = OpLabel\n"
+		"${TYPE_CONVERT:opt}"
 		"%result    = OpVariable %fp_v4f32 Function\n"
 		"             OpStore %result %param\n"
 		"%gen       = ${GEN_RESULT}\n"
@@ -5188,6 +6036,7 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
 	outputColors2[3] = RGBA(0,   0,   255, 255);
 
 	const char addZeroToSc[]		= "OpIAdd %i32 %c_i32_0 %sc_op";
+	const char addZeroToSc32[]		= "OpIAdd %i32 %c_i32_0 %sc_op32";
 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
 
@@ -5222,14 +6071,39 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
 	cases.push_back(SpecConstantTwoIntGraphicsCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",					-2,		0,		addZeroToSc,		outputColors2));
 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",					1,		0,		selectFalseUsingSc,	outputColors2));
 	cases.push_back(SpecConstantTwoIntGraphicsCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %c_i32_0",	1,		1,		addZeroToSc,		outputColors2));
-	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
+	cases.push_back(SpecConstantTwoIntGraphicsCase("sconvert",				" %i32 0",		" %i32 0",		"%i16",		"SConvert             %sc_0",					-1,		0,		addZeroToSc32,		outputColors0));
+	// -1082130432 stored as 32-bit two's complement is the binary representation of -1 as IEEE-754 Float
+	cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert",				" %f32 0",		" %f32 0",		"%f64",		"FConvert             %sc_0",					-1082130432, 0,	addZeroToSc32,		outputColors0));
 	// \todo[2015-12-1 antiagainst] OpQuantizeToF16
 
 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
 	{
-		map<string, string>	specializations;
-		map<string, string>	fragments;
-		vector<deInt32>		specConstants;
+		map<string, string>			specializations;
+		map<string, string>			fragments;
+		vector<deInt32>				specConstants;
+		vector<string>				features;
+		PushConstants				noPushConstants;
+		GraphicsResources			noResources;
+		GraphicsInterfaces			noInterfaces;
+		std::vector<std::string>	noExtensions;
+
+		// Special SPIR-V code for SConvert-case
+		if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
+		{
+			features.push_back("shaderInt16");
+			fragments["capability"]					= "OpCapability Int16\n";					// Adds 16-bit integer capability
+			specializations["OPTYPE_DEFINITIONS"]	= "%i16 = OpTypeInt 16 1\n";				// Adds 16-bit integer type
+			specializations["TYPE_CONVERT"]			= "%sc_op32 = OpSConvert %i32 %sc_op\n";	// Converts 16-bit integer to 32-bit integer
+		}
+
+		// Special SPIR-V code for FConvert-case
+		if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
+		{
+			features.push_back("shaderFloat64");
+			fragments["capability"]					= "OpCapability Float64\n";					// Adds 64-bit float capability
+			specializations["OPTYPE_DEFINITIONS"]	= "%f64 = OpTypeFloat 64\n";				// Adds 64-bit float type
+			specializations["TYPE_CONVERT"]			= "%sc_op32 = OpConvertFToS %i32 %sc_op\n";	// Converts 64-bit float to 32-bit integer
+		}
 
 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
@@ -5244,7 +6118,9 @@ tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
 		specConstants.push_back(cases[caseNdx].scActualValue0);
 		specConstants.push_back(cases[caseNdx].scActualValue1);
 
-		createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants, group.get());
+		createTestsForAllStages(
+			cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
+			noPushConstants, noResources, noInterfaces, noExtensions, features, VulkanFeatures(), group.get());
 	}
 
 	const char	decorations2[]			=
@@ -5497,8 +6373,7 @@ tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
 		"%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
 		"%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
 		"%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
-		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n"
-		;
+		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
 
 	const char						function[]	 =
 		"%test_code      = OpFunction %v4f32 None %v4f32_function\n"
@@ -5649,7 +6524,7 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
 	const NameCodePair tests[] =
 	{
 		{"bool", "", "%bool"},
-		{"vec2uint32", "%type = OpTypeVector %u32 2", "%type"},
+		{"vec2uint32", "", "%v2u32"},
 		{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
 		{"sampler", "%type = OpTypeSampler", "%type"},
 		{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
@@ -5684,8 +6559,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
 		"%b = OpFAdd %f32 %a %actually_zero\n"
 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
 		"OpReturnValue %ret\n"
-		"OpFunctionEnd\n"
-		;
+		"OpFunctionEnd\n";
+
 	createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
 	fragments["testfun"] =
@@ -5697,8 +6572,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
 		"OpReturnValue %ret\n"
-		"OpFunctionEnd\n"
-		;
+		"OpFunctionEnd\n";
+
 	createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
 	fragments["testfun"] =
@@ -5710,8 +6585,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
 		"OpReturnValue %ret\n"
-		"OpFunctionEnd\n"
-		;
+		"OpFunctionEnd\n";
+
 	createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
 	fragments["testfun"] =
@@ -5745,8 +6620,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
 		"OpReturnValue %ret\n"
-		"OpFunctionEnd\n"
-		;
+		"OpFunctionEnd\n";
+
 	createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
 
 	fragments["pre_main"] =
@@ -5782,8 +6657,8 @@ tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
 		"OpReturnValue %ret\n"
-		"OpFunctionEnd\n"
-		;
+		"OpFunctionEnd\n";
+
 	createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
 
 	return opUndefTests.release();
@@ -6209,8 +7084,8 @@ tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
 		"OpReturnValue %result\n"
 
-		"OpFunctionEnd\n"
-		;
+		"OpFunctionEnd\n";
+
 	createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
 
 	// Body comprised of multiple basic blocks.
@@ -6830,6 +7705,12 @@ const string getAsmTypeDeclaration (IntegerType type)
 	return "OpTypeInt " + getBitWidthStr(type) + sign;
 }
 
+const string getAsmTypeName (IntegerType type)
+{
+	const string prefix = isSigned(type) ? "%i" : "%u";
+	return prefix + getBitWidthStr(type);
+}
+
 template<typename T>
 BufferSp getSpecializedBuffer (deInt64 number)
 {
@@ -6894,21 +7775,34 @@ struct ConvertCase
 	, m_inputBuffer		(getBuffer(from, number))
 	, m_outputBuffer	(getBuffer(to, number))
 	{
-		m_asmTypes["inputType"]		= getAsmTypeDeclaration(from);
-		m_asmTypes["outputType"]	= getAsmTypeDeclaration(to);
+		m_asmTypes["inputType"]		= getAsmTypeName(from);
+		m_asmTypes["outputType"]	= getAsmTypeName(to);
 
 		if (m_features == COMPUTE_TEST_USES_INT16)
 		{
-			m_asmTypes["int_capabilities"] = "OpCapability Int16\n";
+			m_asmTypes["int_capabilities"]	  = "OpCapability Int16\n"
+												"OpCapability StorageUniformBufferBlock16\n";
+			m_asmTypes["int_additional_decl"] = "%i16        = OpTypeInt 16 1\n"
+												"%u16        = OpTypeInt 16 0\n";
+			m_asmTypes["int_extensions"]	  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
 		}
 		else if (m_features == COMPUTE_TEST_USES_INT64)
 		{
-			m_asmTypes["int_capabilities"] = "OpCapability Int64\n";
+			m_asmTypes["int_capabilities"]	  = "OpCapability Int64\n";
+			m_asmTypes["int_additional_decl"] = "%i64        = OpTypeInt 64 1\n"
+												"%u64        = OpTypeInt 64 0\n";
+			m_asmTypes["int_extensions"]	  = "";
 		}
 		else if (m_features == COMPUTE_TEST_USES_INT16_INT64)
 		{
-			m_asmTypes["int_capabilities"] = string("OpCapability Int16\n") +
-													"OpCapability Int64\n";
+			m_asmTypes["int_capabilities"]	  = "OpCapability Int16\n"
+												"OpCapability StorageUniformBufferBlock16\n"
+												"OpCapability Int64\n";
+			m_asmTypes["int_additional_decl"] = "%i16        = OpTypeInt 16 1\n"
+												"%u16        = OpTypeInt 16 0\n"
+												"%i64        = OpTypeInt 64 1\n"
+												"%u64        = OpTypeInt 64 0\n";
+			m_asmTypes["int_extensions"]	  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
 		}
 		else
 		{
@@ -6937,6 +7831,7 @@ const string getConvertCaseShaderStr (const string& instruction, const ConvertCa
 	const StringTemplate shader (
 		"OpCapability Shader\n"
 		"${int_capabilities}"
+		"${int_extensions}"
 		"OpMemoryModel Logical GLSL450\n"
 		"OpEntryPoint GLCompute %main \"main\" %id\n"
 		"OpExecutionMode %main LocalSize 1 1 1\n"
@@ -6960,23 +7855,21 @@ const string getConvertCaseShaderStr (const string& instruction, const ConvertCa
 		"%voidf      = OpTypeFunction %void\n"
 		"%u32        = OpTypeInt 32 0\n"
 		"%i32        = OpTypeInt 32 1\n"
+		"${int_additional_decl}"
 		"%uvec3      = OpTypeVector %u32 3\n"
 		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
-		// Custom types
-		"%in_type    = ${inputType}\n"
-		"%out_type   = ${outputType}\n"
 		// Derived types
-		"%in_ptr     = OpTypePointer Uniform %in_type\n"
-		"%out_ptr    = OpTypePointer Uniform %out_type\n"
-		"%in_arr     = OpTypeRuntimeArray %in_type\n"
-		"%out_arr    = OpTypeRuntimeArray %out_type\n"
+		"%in_ptr     = OpTypePointer Uniform ${inputType}\n"
+		"%out_ptr    = OpTypePointer Uniform ${outputType}\n"
+		"%in_arr     = OpTypeRuntimeArray ${inputType}\n"
+		"%out_arr    = OpTypeRuntimeArray ${outputType}\n"
 		"%in_buf     = OpTypeStruct %in_arr\n"
 		"%out_buf    = OpTypeStruct %out_arr\n"
 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
 		"%out_bufptr = OpTypePointer Uniform %out_buf\n"
 		"%indata     = OpVariable %in_bufptr Uniform\n"
 		"%outdata    = OpVariable %out_bufptr Uniform\n"
-		"%inputptr   = OpTypePointer Input %in_type\n"
+		"%inputptr   = OpTypePointer Input ${inputType}\n"
 		"%id         = OpVariable %uvec3ptr Input\n"
 		// Constants
 		"%zero       = OpConstant %i32 0\n"
@@ -6987,8 +7880,8 @@ const string getConvertCaseShaderStr (const string& instruction, const ConvertCa
 		"%x          = OpCompositeExtract %u32 %idval 0\n"
 		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
 		"%outloc     = OpAccessChain %out_ptr %outdata %zero %x\n"
-		"%inval      = OpLoad %in_type %inloc\n"
-		"%conv       = ${instruction} %out_type %inval\n"
+		"%inval      = OpLoad ${inputType} %inloc\n"
+		"%conv       = ${instruction} ${outputType} %inval\n"
 		"              OpStore %outloc %conv\n"
 		"              OpReturn\n"
 		"              OpFunctionEnd\n"
@@ -7029,6 +7922,11 @@ tcu::TestCaseGroup* createSConvertTests (tcu::TestContext& testCtx)
 		spec.outputs.push_back(test->m_outputBuffer);
 		spec.numWorkGroups = IVec3(1, 1, 1);
 
+		if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
+		{
+			spec.extensions.push_back("VK_KHR_16bit_storage");
+		}
+
 		group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpSConvert.", spec, test->m_features));
 	}
 
@@ -7067,6 +7965,11 @@ tcu::TestCaseGroup* createUConvertTests (tcu::TestContext& testCtx)
 		spec.outputs.push_back(test->m_outputBuffer);
 		spec.numWorkGroups = IVec3(1, 1, 1);
 
+		if (test->m_features == COMPUTE_TEST_USES_INT16 || test->m_features == COMPUTE_TEST_USES_INT16_INT64)
+		{
+			spec.extensions.push_back("VK_KHR_16bit_storage");
+		}
+
 		group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "Convert integers with OpUConvert.", spec, test->m_features));
 	}
 	return group.release();
@@ -7136,16 +8039,18 @@ void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Ra
 	// Vec2 to Vec4
 	for (int width = 2; width <= 4; ++width)
 	{
-		string randomConst = numberToString(getInt(rnd));
-		string widthStr = numberToString(width);
-		int index = rnd.getInt(0, width-1);
-
-		params["type"]					= "vec";
-		params["name"]					= params["type"] + "_" + widthStr;
-		params["compositeType"]			= "%composite = OpTypeVector %custom " + widthStr +"\n";
-		params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
-		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
-		params["indexes"]				= numberToString(index);
+		const string randomConst = numberToString(getInt(rnd));
+		const string widthStr = numberToString(width);
+		const string composite_type = "${customType}vec" + widthStr;
+		const int index = rnd.getInt(0, width-1);
+
+		params["type"]			= "vec";
+		params["name"]			= params["type"] + "_" + widthStr;
+		params["compositeDecl"]		= composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
+		params["compositeType"]		= composite_type;
+		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
+		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
+		params["indexes"]		= numberToString(index);
 		testCases.push_back(params);
 	}
 }
@@ -7161,14 +8066,14 @@ void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Ran
 		string widthStr = numberToString(width);
 		int index = rnd.getInt(0, width-1);
 
-		params["type"]					= "array";
-		params["name"]					= params["type"] + "_" + widthStr;
-		params["compositeType"]			= string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
-											+	 "%composite = OpTypeArray %custom %arraywidth\n";
-
-		params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
+		params["type"]			= "array";
+		params["name"]			= params["type"] + "_" + widthStr;
+		params["compositeDecl"]		= string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
+											+	 "%composite = OpTypeArray ${customType} %arraywidth\n";
+		params["compositeType"]		= "%composite";
+		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
-		params["indexes"]				= numberToString(index);
+		params["indexes"]		= numberToString(index);
 		testCases.push_back(params);
 	}
 }
@@ -7183,12 +8088,13 @@ void createStructCompositeCases (vector<map<string, string> >& testCases, de::Ra
 		string randomConst = numberToString(getInt(rnd));
 		int index = rnd.getInt(0, width-1);
 
-		params["type"]					= "struct";
-		params["name"]					= params["type"] + "_" + numberToString(width);
-		params["compositeType"]			= "%composite = OpTypeStruct" + repeatString(" %custom", width) + "\n";
-		params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n";
+		params["type"]			= "struct";
+		params["name"]			= params["type"] + "_" + numberToString(width);
+		params["compositeDecl"]		= "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
+		params["compositeType"]		= "%composite";
+		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
-		params["indexes"]				= numberToString(index);
+		params["indexes"]		= numberToString(index);
 		testCases.push_back(params);
 	}
 }
@@ -7208,16 +8114,17 @@ void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Ra
 			int index_1 = rnd.getInt(0, width-1);
 			string columnStr = numberToString(column);
 
-			params["type"]					= "matrix";
-			params["name"]					= params["type"] + "_" + widthStr + "x" + columnStr;
-			params["compositeType"]			= string("%vectype   = OpTypeVector %custom " + widthStr + "\n")
+			params["type"]		= "matrix";
+			params["name"]		= params["type"] + "_" + widthStr + "x" + columnStr;
+			params["compositeDecl"]	= string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
 												+	 "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
+			params["compositeType"]	= "%composite";
 
-			params["filler"]				= string("%filler    = OpConstant %custom ") + getRandomConstantString(type, rnd) + "\n"
+			params["filler"]	= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
 												+	 "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
 
 			params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
-			params["indexes"]				= numberToString(index_0) + " " + numberToString(index_1);
+			params["indexes"]	= numberToString(index_0) + " " + numberToString(index_1);
 			testCases.push_back(params);
 		}
 	}
@@ -7246,15 +8153,37 @@ const string getAssemblyTypeDeclaration (const NumberType type)
 	}
 }
 
+const string getAssemblyTypeName (const NumberType type)
+{
+	switch (type)
+	{
+		case NUMBERTYPE_INT32:		return "%i32";
+		case NUMBERTYPE_UINT32:		return "%u32";
+		case NUMBERTYPE_FLOAT32:	return "%f32";
+		default:			DE_ASSERT(false); return "";
+	}
+}
+
 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
 {
 	map<string, string>	parameters(params);
 
-	parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
-
+	const string customType = getAssemblyTypeName(type);
+	map<string, string> substCustomType;
+	substCustomType["customType"] = customType;
+	parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
+	parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
+	parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
+	parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
+	parameters["customType"] = customType;
 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
 
-	return StringTemplate (
+	if (parameters.at("compositeType") != "%u32vec3")
+	{
+		parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
+	}
+
+	return StringTemplate(
 		"OpCapability Shader\n"
 		"OpCapability Matrix\n"
 		"OpMemoryModel Logical GLSL450\n"
@@ -7281,19 +8210,20 @@ const string specializeCompositeInsertShaderTemplate (const NumberType type, con
 		"%voidf     = OpTypeFunction %void\n"
 		"%u32       = OpTypeInt 32 0\n"
 		"%i32       = OpTypeInt 32 1\n"
-		"%uvec3     = OpTypeVector %u32 3\n"
-		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
+		"%f32       = OpTypeFloat 32\n"
 
-		// Custom type
-		"%custom    = ${typeDeclaration}\n"
-		"${compositeType}"
+		// Composite declaration
+		"${compositeDecl}"
 
 		// Constants
 		"${filler}"
 
+		"${u32vec3Decl:opt}"
+		"%uvec3ptr  = OpTypePointer Input %u32vec3\n"
+
 		// Inherited from custom
-		"%customptr = OpTypePointer Uniform %custom\n"
-		"%customarr = OpTypeRuntimeArray %custom\n"
+		"%customptr = OpTypePointer Uniform ${customType}\n"
+		"%customarr = OpTypeRuntimeArray ${customType}\n"
 		"%buf       = OpTypeStruct %customarr\n"
 		"%bufptr    = OpTypePointer Uniform %buf\n"
 
@@ -7305,19 +8235,19 @@ const string specializeCompositeInsertShaderTemplate (const NumberType type, con
 
 		"%main      = OpFunction %void None %voidf\n"
 		"%label     = OpLabel\n"
-		"%idval     = OpLoad %uvec3 %id\n"
+		"%idval     = OpLoad %u32vec3 %id\n"
 		"%x         = OpCompositeExtract %u32 %idval 0\n"
 
 		"%inloc     = OpAccessChain %customptr %indata %zero %x\n"
 		"%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
 		// Read the input value
-		"%inval     = OpLoad %custom %inloc\n"
+		"%inval     = OpLoad ${customType} %inloc\n"
 		// Create the composite and fill it
 		"${compositeConstruct}"
 		// Insert the input value to a place
-		"%instance2 = OpCompositeInsert %composite %inval %instance ${indexes}\n"
+		"%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
 		// Read back the value from the position
-		"%out_val   = OpCompositeExtract %custom %instance2 ${indexes}\n"
+		"%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
 		// Store it in the output position
 		"             OpStore %outloc %out_val\n"
 		"             OpReturn\n"
@@ -7406,10 +8336,9 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
 	vector<string>		indexes		= de::splitString(fullIndex, ' ');
 
 	map<string, string>	parameters	(params);
-	parameters["typeDeclaration"]	= getAssemblyTypeDeclaration(type);
-	parameters["structType"]		= repeatString(" %composite", structInfo.components);
+	parameters["structType"]	= repeatString(" ${compositeType}", structInfo.components);
 	parameters["structConstruct"]	= repeatString(" %instance", structInfo.components);
-	parameters["insertIndexes"]		= fullIndex;
+	parameters["insertIndexes"]	= fullIndex;
 
 	// In matrix cases the last two index is the CompositeExtract indexes
 	const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
@@ -7434,7 +8363,25 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
 
 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
 
-	return StringTemplate (
+	const string customType = getAssemblyTypeName(type);
+	map<string, string> substCustomType;
+	substCustomType["customType"] = customType;
+	parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
+	parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
+	parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
+	parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
+	parameters["customType"] = customType;
+
+	const string compositeType = parameters.at("compositeType");
+	map<string, string> substCompositeType;
+	substCompositeType["compositeType"] = compositeType;
+	parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
+	if (compositeType != "%u32vec3")
+	{
+		parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
+	}
+
+	return StringTemplate(
 		"OpCapability Shader\n"
 		"OpCapability Matrix\n"
 		"OpMemoryModel Logical GLSL450\n"
@@ -7457,23 +8404,24 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
 		// General types
 		"%void      = OpTypeVoid\n"
 		"%voidf     = OpTypeFunction %void\n"
+		"%i32       = OpTypeInt 32 1\n"
 		"%u32       = OpTypeInt 32 0\n"
-		"%uvec3     = OpTypeVector %u32 3\n"
-		"%uvec3ptr  = OpTypePointer Input %uvec3\n"
-		// Custom type
-		"%custom    = ${typeDeclaration}\n"
+		"%f32       = OpTypeFloat 32\n"
 		// Custom types
-		"${compositeType}"
+		"${compositeDecl}"
+		// %u32vec3 if not already declared in ${compositeDecl}
+		"${u32vec3Decl:opt}"
+		"%uvec3ptr  = OpTypePointer Input %u32vec3\n"
 		// Inherited from composite
-		"%composite_p = OpTypePointer Function %composite\n"
+		"%composite_p = OpTypePointer Function ${compositeType}\n"
 		"%struct_t  = OpTypeStruct${structType}\n"
 		"%struct_p  = OpTypePointer Function %struct_t\n"
 		// Constants
 		"${filler}"
 		"${accessChainConstDeclaration}"
 		// Inherited from custom
-		"%customptr = OpTypePointer Uniform %custom\n"
-		"%customarr = OpTypeRuntimeArray %custom\n"
+		"%customptr = OpTypePointer Uniform ${customType}\n"
+		"%customarr = OpTypeRuntimeArray ${customType}\n"
 		"%buf       = OpTypeStruct %customarr\n"
 		"%bufptr    = OpTypePointer Uniform %buf\n"
 		"%indata    = OpVariable %bufptr Uniform\n"
@@ -7484,13 +8432,13 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
 		"%main      = OpFunction %void None %voidf\n"
 		"%label     = OpLabel\n"
 		"%struct_v  = OpVariable %struct_p Function\n"
-		"%idval     = OpLoad %uvec3 %id\n"
+		"%idval     = OpLoad %u32vec3 %id\n"
 		"%x         = OpCompositeExtract %u32 %idval 0\n"
 		// Create the input/output type
 		"%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
 		"%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
 		// Read the input value
-		"%inval     = OpLoad %custom %inloc\n"
+		"%inval     = OpLoad ${customType} %inloc\n"
 		// Create the composite and fill it
 		"${compositeConstruct}"
 		// Create the struct and fill it with the composite
@@ -7501,12 +8449,13 @@ const string specializeInBoundsShaderTemplate (const NumberType type, const Asse
 		"             OpStore %struct_v %comp_obj\n"
 		// Get deepest possible composite pointer
 		"%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
-		"%read_obj  = OpLoad %composite %inner_ptr\n"
+		"%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
 		// Read back the stored value
-		"%read_val  = OpCompositeExtract %custom %read_obj${extractIndexes}\n"
+		"%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
 		"             OpStore %outloc %read_val\n"
 		"             OpReturn\n"
-		"             OpFunctionEnd\n").specialize(parameters);
+		"             OpFunctionEnd\n"
+	).specialize(parameters);
 }
 
 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
@@ -7575,19 +8524,17 @@ const string specializeDefaultOutputShaderTemplate (const NumberType type, const
 {
 	map<string, string> parameters(params);
 
-	parameters["typeDeclaration"] = getAssemblyTypeDeclaration(type);
+	parameters["customType"]	= getAssemblyTypeName(type);
 
 	// Declare the const value, and use it in the initializer
 	if (params.find("constValue") != params.end())
 	{
-		parameters["constDeclaration"]		= "%const      = OpConstant %in_type " + params.at("constValue") + "\n";
-		parameters["variableInitializer"]	= "%const";
+		parameters["variableInitializer"]	= " %const";
 	}
 	// Uninitialized case
 	else
 	{
-		parameters["constDeclaration"]		= "";
-		parameters["variableInitializer"]	= "";
+		parameters["commentDecl"]	= ";";
 	}
 
 	return StringTemplate(
@@ -7612,33 +8559,31 @@ const string specializeDefaultOutputShaderTemplate (const NumberType type, const
 		"%voidf      = OpTypeFunction %void\n"
 		"%u32        = OpTypeInt 32 0\n"
 		"%i32        = OpTypeInt 32 1\n"
+		"%f32        = OpTypeFloat 32\n"
 		"%uvec3      = OpTypeVector %u32 3\n"
 		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
-		// Custom types
-		"%in_type    = ${typeDeclaration}\n"
-		// "%const      = OpConstant %in_type ${constValue}\n"
-		"${constDeclaration}\n"
+		"${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
 		// Derived types
-		"%in_ptr     = OpTypePointer Uniform %in_type\n"
-		"%in_arr     = OpTypeRuntimeArray %in_type\n"
+		"%in_ptr     = OpTypePointer Uniform ${customType}\n"
+		"%in_arr     = OpTypeRuntimeArray ${customType}\n"
 		"%in_buf     = OpTypeStruct %in_arr\n"
 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
 		"%indata     = OpVariable %in_bufptr Uniform\n"
 		"%outdata    = OpVariable %in_bufptr Uniform\n"
 		"%id         = OpVariable %uvec3ptr Input\n"
-		"%var_ptr    = OpTypePointer Function %in_type\n"
+		"%var_ptr    = OpTypePointer Function ${customType}\n"
 		// Constants
 		"%zero       = OpConstant %i32 0\n"
 		// Main function
 		"%main       = OpFunction %void None %voidf\n"
 		"%label      = OpLabel\n"
-		"%out_var    = OpVariable %var_ptr Function ${variableInitializer}\n"
+		"%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
 		"%idval      = OpLoad %uvec3 %id\n"
 		"%x          = OpCompositeExtract %u32 %idval 0\n"
 		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
 		"%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
 
-		"%outval     = OpLoad %in_type %out_var\n"
+		"%outval     = OpLoad ${customType} %out_var\n"
 		"              OpStore %outloc %outval\n"
 		"              OpReturn\n"
 		"              OpFunctionEnd\n"
@@ -7655,11 +8600,13 @@ bool compareFloats (const std::vector<BufferSp>&, const vector<AllocationSp>& ou
 
 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 	{
-		float expected;
-		memcpy(&expected, expectedOutputs[outputNdx]->data(), expectedOutputs[outputNdx]->getNumBytes());
+		vector<deUint8>	expectedBytes;
+		float			expected;
+		float			actual;
 
-		float actual;
-		memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedOutputs[outputNdx]->getNumBytes());
+		expectedOutputs[outputNdx]->getBytes(expectedBytes);
+		memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
+		memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
 
 		// Test with epsilon
 		if (fabs(expected - actual) > epsilon)
@@ -7681,9 +8628,12 @@ bool passthruVerify (const std::vector<BufferSp>&, const vector<AllocationSp>& o
 	// Copy and discard the result.
 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 	{
-		size_t width = expectedOutputs[outputNdx]->getNumBytes();
+		vector<deUint8>	expectedBytes;
+		expectedOutputs[outputNdx]->getBytes(expectedBytes);
+
+		const size_t	width			= expectedBytes.size();
+		vector<char>	data			(width);
 
-		vector<char> data(width);
 		memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
 	}
 	return true;
@@ -7800,6 +8750,7 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
 	de::MovePtr<tcu::TestCaseGroup> computeTests		(new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
 	de::MovePtr<tcu::TestCaseGroup> graphicsTests		(new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
 
+	computeTests->addChild(createLocalSizeGroup(testCtx));
 	computeTests->addChild(createOpNopGroup(testCtx));
 	computeTests->addChild(createOpFUnordGroup(testCtx));
 	computeTests->addChild(createOpAtomicGroup(testCtx, false));
@@ -7849,7 +8800,11 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
 	}
 
 	computeTests->addChild(create16BitStorageComputeGroup(testCtx));
+	computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
+	computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
+	computeTests->addChild(createIndexingComputeGroup(testCtx));
 	computeTests->addChild(createVariablePointersComputeGroup(testCtx));
+	computeTests->addChild(createImageSamplerComputeGroup(testCtx));
 	graphicsTests->addChild(createOpNopTests(testCtx));
 	graphicsTests->addChild(createOpSourceTests(testCtx));
 	graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
@@ -7884,7 +8839,11 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
 	}
 
 	graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
+	graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
+	graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
+	graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
 	graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
+	graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
 
 	instructionTests->addChild(computeTests.release());
 	instructionTests->addChild(graphicsTests.release());