Merge vk-gl-cts/vulkan-cts-1.1.5 into vk-gl-cts/vulkan-cts-1.1.6
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / spirv_assembly / vktSpvAsmInstructionTests.cpp
index 0a8ac9f..31c626b 100644 (file)
 #include "vktSpvAsmComputeShaderCase.hpp"
 #include "vktSpvAsmComputeShaderTestUtil.hpp"
 #include "vktSpvAsmFloatControlsTests.hpp"
+#include "vktSpvAsmFromHlslTests.hpp"
 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
 #include "vktSpvAsmVariablePointersTests.hpp"
 #include "vktSpvAsmVariableInitTests.hpp"
 #include "vktSpvAsmPointerParameterTests.hpp"
+#include "vktSpvAsmSpirvVersion1p4Tests.hpp"
 #include "vktSpvAsmSpirvVersionTests.hpp"
 #include "vktTestCaseUtil.hpp"
 #include "vktSpvAsmLoopDepLenTests.hpp"
@@ -74,6 +76,7 @@
 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
 #include "vktSpvAsmSignedIntCompareTests.hpp"
 #include "vktSpvAsmPtrAccessChainTests.hpp"
+#include "vktSpvAsm64bitCompareTests.hpp"
 
 #include <cmath>
 #include <limits>
@@ -108,6 +111,629 @@ using tcu::Vec4;
 const bool TEST_WITH_NAN       = true;
 const bool TEST_WITHOUT_NAN    = false;
 
+const string loadScalarF16FromUint =
+       "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
+       "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
+       "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
+       "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
+       "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
+       "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
+       "OpReturnValue %ld_arg_${var}_ex\n"
+       "OpFunctionEnd\n";
+
+const string loadV2F16FromUint =
+       "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
+       "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
+       "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
+       "OpReturnValue %ld_arg_${var}_cast\n"
+       "OpFunctionEnd\n";
+
+const string loadV3F16FromUints =
+       // Since we allocate a vec4 worth of values, this case is almost the
+       // same as that case.
+       "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
+       "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
+       "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
+       "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
+       "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
+       "OpReturnValue %ld_arg_${var}_shuffle\n"
+       "OpFunctionEnd\n";
+
+const string loadV4F16FromUints =
+       "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
+       "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
+       "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
+       "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
+       "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
+       "OpReturnValue %ld_arg_${var}_shuffle\n"
+       "OpFunctionEnd\n";
+
+const string loadM2x2F16FromUints =
+       "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
+       "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
+       "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
+       "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
+       "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
+       "OpReturnValue %ld_arg_${var}_cons\n"
+       "OpFunctionEnd\n";
+
+const string loadM2x3F16FromUints =
+       "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
+       "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
+       "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
+       "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
+       "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
+       "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
+       "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
+       "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
+       "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
+       "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM2x4F16FromUints =
+       "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
+       "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
+       "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
+       "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
+       "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
+       "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
+       "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
+       "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
+       "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
+       "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM3x2F16FromUints =
+       "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
+       "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
+       "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
+       "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
+       "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
+       "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM3x3F16FromUints =
+       "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
+       "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
+       "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
+       "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
+       "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
+       "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
+       "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
+       "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
+       "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
+       "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
+       "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
+       "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
+       "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
+       "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
+       "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
+       "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
+       "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM3x4F16FromUints =
+       "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
+       "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
+       "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
+       "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
+       "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
+       "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
+       "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
+       "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
+       "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
+       "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
+       "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
+       "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
+       "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
+       "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
+       "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
+       "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
+       "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM4x2F16FromUints =
+       "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
+       "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
+       "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
+       "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
+       "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
+       "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
+       "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
+       "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM4x3F16FromUints =
+       "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
+       "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
+       "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
+       "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
+       "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
+       "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
+       "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
+       "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
+       "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
+       "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
+       "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
+       "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
+       "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
+       "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
+       "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
+       "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
+       "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
+       "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
+       "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
+       "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
+       "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
+       "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
+       "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
+       "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string loadM4x4F16FromUints =
+       "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
+       "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
+       "%ld_arg_${var}_entry = OpLabel\n"
+       "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
+       "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
+       "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
+       "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
+       "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
+       "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
+       "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
+       "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
+       "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
+       "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
+       "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
+       "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
+       "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
+       "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
+       "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
+       "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
+       "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
+       "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
+       "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
+       "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
+       "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
+       "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
+       "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
+       "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
+       "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
+       "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
+       "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
+       "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
+       "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
+       "OpReturnValue %ld_arg_${var}_mat\n"
+       "OpFunctionEnd\n";
+
+const string storeScalarF16AsUint =
+       // This version is sensitive to the initial value in the output buffer.
+       // The infrastructure sets all output buffer bits to one before invoking
+       // the shader so this version uses an atomic and to generate the correct
+       // zeroes.
+       "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
+       "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
+       "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
+       // Or 16 bits of ones into the half that was not populated with the result.
+       "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
+       "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
+       "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
+       "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
+       "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
+       "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
+       "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeV2F16AsUint =
+       "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
+       "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
+       "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeV3F16AsUints =
+       // Since we allocate a vec4 worth of values, this case can be treated
+       // almost the same as a vec4 case. We will store some extra data that
+       // should not be compared.
+       "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
+       "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
+       "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
+       "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
+       "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
+       "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeV4F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
+       "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
+       "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
+       "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
+       "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
+       "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM2x2F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
+       "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
+       "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
+       "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM2x3F16AsUints =
+       // In the extracted elements for 01 and 11 the second element doesn't
+       // matter.
+       "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
+       "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
+       "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
+       "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
+       "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
+       "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
+       "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
+       "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
+       "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
+       "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
+       "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
+       "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM2x4F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
+       "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
+       "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
+       "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
+       "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
+       "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
+       "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
+       "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
+       "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
+       "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
+       "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
+       "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM3x2F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
+       "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
+       "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
+       "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
+       "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
+       "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
+       "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM3x3F16AsUints =
+       // The second element of the each broken down vec3 doesn't matter.
+       "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
+       "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
+       "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
+       "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
+       "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
+       "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
+       "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
+       "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
+       "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
+       "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
+       "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
+       "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
+       "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
+       "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
+       "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
+       "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
+       "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
+       "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
+       "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
+       "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
+       "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM3x4F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
+       "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
+       "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
+       "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
+       "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
+       "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
+       "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
+       "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
+       "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
+       "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
+       "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
+       "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
+       "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
+       "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
+       "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
+       "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
+       "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
+       "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
+       "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
+       "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
+       "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM4x2F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
+       "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
+       "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
+       "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
+       "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
+       "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
+       "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
+       "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
+       "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
+       "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM4x3F16AsUints =
+       // The last element of each decomposed vec3 doesn't matter.
+       "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
+       "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
+       "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
+       "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
+       "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
+       "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
+       "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
+       "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
+       "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
+       "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
+       "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
+       "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
+       "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
+       "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
+       "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
+       "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
+       "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
+       "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
+       "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
+       "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
+       "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
+       "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
+       "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
+       "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
+       "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
+       "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
+       "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
+       "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
+       "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
+       "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
+const string storeM4x4F16AsUints =
+       "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
+       "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
+       "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
+       "%st_fn_${var}_entry = OpLabel\n"
+       "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
+       "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
+       "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
+       "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
+       "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
+       "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
+       "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
+       "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
+       "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
+       "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
+       "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
+       "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
+       "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
+       "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
+       "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
+       "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
+       "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
+       "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
+       "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
+       "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
+       "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
+       "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
+       "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
+       "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
+       "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
+       "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
+       "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
+       "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
+       "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
+       "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
+       "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
+       "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
+       "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
+       "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
+       "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
+       "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
+       "OpReturn\n"
+       "OpFunctionEnd\n";
+
 template<typename T>
 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
 {
@@ -690,21 +1316,25 @@ struct OpAtomicCase
                                                , numOutputElements     (_numOutputElements) {}
 };
 
-tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false)
+tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
 {
        std::string                                             groupName                       ("opatomic");
        if (useStorageBuffer)
                groupName += "_storage_buffer";
        if (verifyReturnValues)
                groupName += "_return_values";
+       if (volatileAtomic)
+               groupName += "_volatile";
        de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
        vector<OpAtomicCase>                    cases;
 
        const StringTemplate                    shaderTemplate  (
 
                string("OpCapability Shader\n") +
+               (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
                (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
-               "OpMemoryModel Logical GLSL450\n"
+               (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
+               (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
                "OpEntryPoint GLCompute %main \"main\" %id\n"
                "OpExecutionMode %main LocalSize 1 1 1\n" +
 
@@ -723,7 +1353,6 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
                "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
                "OpDecorate %sum DescriptorSet 0\n"
                "OpDecorate %sum Binding 1\n"
-               "OpMemberDecorate %sumbuf 0 Coherent\n"
                "OpMemberDecorate %sumbuf 0 Offset 0\n"
 
                "${RETVAL_BUF_DECORATE}"
@@ -745,6 +1374,8 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
                "%zero      = OpConstant %i32 0\n"
                "%one       = OpConstant %u32 1\n"
                "%two       = OpConstant %i32 2\n"
+               "%five      = OpConstant %i32 5\n"
+               "%volbit    = OpConstant %i32 32768\n"
 
                "%main      = OpFunction %void None %voidf\n"
                "%label     = OpLabel\n"
@@ -769,24 +1400,24 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
        #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
        #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
 
-       ADD_OPATOMIC_CASE_1(iadd,       "%retv      = OpAtomicIAdd %i32 %outloc %one %zero %inval\n",
+       ADD_OPATOMIC_CASE_1(iadd,       "%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
                                                                "             OpStore %retloc %retv\n", OPATOMIC_IADD );
-       ADD_OPATOMIC_CASE_1(isub,       "%retv      = OpAtomicISub %i32 %outloc %one %zero %inval\n",
+       ADD_OPATOMIC_CASE_1(isub,       "%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
                                                                "             OpStore %retloc %retv\n", OPATOMIC_ISUB );
-       ADD_OPATOMIC_CASE_1(iinc,       "%retv      = OpAtomicIIncrement %i32 %outloc %one %zero\n",
+       ADD_OPATOMIC_CASE_1(iinc,       "%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
                                                                "             OpStore %retloc %retv\n", OPATOMIC_IINC );
-       ADD_OPATOMIC_CASE_1(idec,       "%retv      = OpAtomicIDecrement %i32 %outloc %one %zero\n",
+       ADD_OPATOMIC_CASE_1(idec,       "%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
                                                                "             OpStore %retloc %retv\n", OPATOMIC_IDEC );
        if (!verifyReturnValues)
        {
-               ADD_OPATOMIC_CASE_N(load,       "%inval2    = OpAtomicLoad %i32 %inloc %one %zero\n"
+               ADD_OPATOMIC_CASE_N(load,       "%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
                                                                        "             OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
-               ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc %one %zero %inval\n", "", OPATOMIC_STORE );
+               ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
        }
 
        ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
                                                                "             OpStore %outloc %even\n"
-                                                               "%retv      = OpAtomicCompareExchange %i32 %outloc %one %zero %zero %minusone %zero\n",
+                                                               "%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
                                                                "                         OpStore %retloc %retv\n", OPATOMIC_COMPEX );
 
 
@@ -801,6 +1432,19 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
                vector<deInt32>                         inputInts               (numElements, 0);
                vector<deInt32>                         expected                (cases[caseNdx].numOutputElements, -1);
 
+               if (volatileAtomic)
+               {
+                       spec.extensions.push_back("VK_KHR_vulkan_memory_model");
+                       // volatile, queuefamily scope
+                       specializations["SEMANTICS"] = "%volbit";
+                       specializations["SCOPE"] = "%five";
+               }
+               else
+               {
+                       // non-volatile, device scope
+                       specializations["SEMANTICS"] = "%zero";
+                       specializations["SCOPE"] = "%one";
+               }
                specializations["INDEX"]                                = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
                specializations["INSTRUCTION"]                  = cases[caseNdx].assembly;
                specializations["BLOCK_DECORATION"]             = useStorageBuffer ? "Block" : "BufferBlock";
@@ -837,6 +1481,10 @@ tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStor
 
                spec.assembly                                                   = shaderTemplate.specialize(specializations);
 
+               // Specialize one more time, to catch things that were in a template parameter
+               const StringTemplate                                    assemblyTemplate(spec.assembly);
+               spec.assembly                                                   = assemblyTemplate.specialize(specializations);
+
                if (useStorageBuffer)
                        spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
 
@@ -3109,6 +3757,7 @@ void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestC
        specFloat16.assembly =
                "OpCapability Shader\n"
                "OpCapability StorageUniformBufferBlock16\n"
+               "OpCapability Float16\n"
                "OpExtension \"SPV_KHR_16bit_storage\"\n"
                "OpMemoryModel Logical GLSL450\n"
                "OpEntryPoint GLCompute %main \"main\" %id\n"
@@ -3141,9 +3790,9 @@ void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestC
 
                "%id       = OpVariable %uvec3ptr Input\n"
                "%zero     = OpConstant %i32 0\n"
-               "%float_0  = OpConstant %f16 0.0\n"
-               "%float_1  = OpConstant %f16 1.0\n"
-               "%float_n1 = OpConstant %f16 -1.0\n"
+               "%float_0  = OpConstant %f32 0.0\n"
+               "%float_1  = OpConstant %f32 1.0\n"
+               "%float_n1 = OpConstant %f32 -1.0\n"
 
                "%main     = OpFunction %void None %voidf\n"
                "%entry    = OpLabel\n"
@@ -3151,8 +3800,9 @@ void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestC
                "%x        = OpCompositeExtract %u32 %idval 0\n"
                "%inloc    = OpAccessChain %f16ptr %indata %zero %x\n"
                "%inval    = OpLoad %f16 %inloc\n"
+               "%f32_inval = OpFConvert %f32 %inval\n"
 
-               "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
+               "%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
                "            OpSelectionMerge %cm None\n"
                "            OpBranchConditional %comp %tb %fb\n"
                "%tb       = OpLabel\n"
@@ -3160,10 +3810,11 @@ void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestC
                "%fb       = OpLabel\n"
                "            OpBranch %cm\n"
                "%cm       = OpLabel\n"
-               "%res      = OpPhi %f16 %float_1 %tb %float_n1 %fb\n"
+               "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
+               "%f16_res  = OpFConvert %f16 %res\n"
 
                "%outloc   = OpAccessChain %f16ptr %outdata %zero %x\n"
-               "            OpStore %outloc %res\n"
+               "            OpStore %outloc %f16_res\n"
                "            OpReturn\n"
 
                "            OpFunctionEnd\n";
@@ -6119,10 +6770,8 @@ tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& tes
                spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
                spec.numWorkGroups = IVec3(numElements, 1, 1);
 
-               spec.extensions.push_back("VK_KHR_16bit_storage");
                spec.extensions.push_back("VK_KHR_shader_float16_int8");
 
-               spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
                spec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
 
                group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
@@ -7566,7 +8215,7 @@ tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
 
        fragments4["pre_main"]          = typesAndConstants4;
        fragments4["testfun"]           = function4;
-       fragments4["capability"]        = "OpCapability StorageUniformBufferBlock16\n";
+       fragments4["capability"]        = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
        fragments4["extension"]         = "OpExtension \"SPV_KHR_16bit_storage\"";
 
        extensions4.push_back("VK_KHR_16bit_storage");
@@ -9400,7 +10049,7 @@ struct ConvertCase
 
                        if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
                        {
-                               // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
+                               // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
                                if (usesInt32(from, to) || usesFloat32(from, to))
                                        requiresInt16Capability = false;
                        }
@@ -9418,8 +10067,8 @@ struct ConvertCase
                {
                        decl += "%f16        = OpTypeFloat 16\n";
 
-                       // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
-                       if (!(usesInt32(from, to) || usesFloat32(from, to)))
+                       // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
+                       if (!usesFloat32(from, to))
                                caps += "OpCapability Float16\n";
                }
 
@@ -9536,6 +10185,22 @@ void createConvertCases (vector<ConvertCase>& testCases, const string& instructi
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_32,          701256243));
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_16,          4741));
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_8,           65));
+
+               // Zero extension for int->uint
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   209));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   251));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   62195));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
+
+               // Truncate for int->uint
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
        }
        else if (instruction == "OpSConvert")
        {
@@ -9566,7 +10231,7 @@ void createConvertCases (vector<ConvertCase>& testCases, const string& instructi
                // Truncate for int->uint
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
-               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   61165));
+               testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
                testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
@@ -10056,7 +10721,6 @@ tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
        outputColors[2] = RGBA(127, 255, 127, 255);
        outputColors[3] = RGBA(127, 127, 255, 255);
 
-       extensions.push_back("VK_KHR_16bit_storage");
        extensions.push_back("VK_KHR_shader_float16_int8");
        features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
 
@@ -10064,7 +10728,6 @@ tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
        {
                map<string, string> fragments;
 
-               fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
                fragments["capability"] = "OpCapability Float16\n";
                fragments["pre_main"]   = tests[testNdx].constants;
                fragments["testfun"]    = string(functionStart) + tests[testNdx].code + functionEnd;
@@ -10127,8 +10790,8 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
        de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
 
        de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
-       const string                                            spvCapabilities         = string("OpCapability StorageUniformBufferBlock16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
-       const string                                            spvExtensions           = string("OpExtension \"SPV_KHR_16bit_storage\"\n") + (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
+       const string                                            spvCapabilities         = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
+       const string                                            spvExtensions           = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
        const string                                            spvExecutionMode        = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
        const deUint32                                          numDataPointsScalar     = 16;
        const deUint32                                          numDataPointsVector     = 14;
@@ -10170,22 +10833,28 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
        { // scalar cases
                const StringTemplate preMain
                (
-                       "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
-                       "      %f16 = OpTypeFloat 16\n"
-                       "  %c_f16_0 = OpConstant %f16 0.0\n"
-                       "  %c_f16_1 = OpConstant %f16 1.0\n"
-                       "   %up_f16 = OpTypePointer Uniform %f16\n"
-                       "   %ra_f16 = OpTypeArray %f16 %c_i32_ndp\n"
-                       "   %SSBO16 = OpTypeStruct %ra_f16\n"
-                       "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
-                       "%ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
-                       "%ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
-                       " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
+                       "      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
+                       "     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
+                       "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
+                       " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
+                       "            %f16 = OpTypeFloat 16\n"
+                       "          %v2f16 = OpTypeVector %f16 2\n"
+                       "        %c_f16_0 = OpConstant %f16 0.0\n"
+                       "        %c_f16_1 = OpConstant %f16 1.0\n"
+                       "         %up_u32 = OpTypePointer Uniform %u32\n"
+                       "         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
+                       "         %SSBO16 = OpTypeStruct %ra_u32\n"
+                       "      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
+                       "     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
+                       "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
+                       "      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
+                       "      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
+                       "       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
                );
 
                const StringTemplate decoration
                (
-                       "OpDecorate %ra_f16 ArrayStride 2\n"
+                       "OpDecorate %ra_u32 ArrayStride 4\n"
                        "OpMemberDecorate %SSBO16 0 Offset 0\n"
                        "OpDecorate %SSBO16 BufferBlock\n"
                        "OpDecorate %ssbo_src0 DescriptorSet 0\n"
@@ -10215,15 +10884,13 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
                        "    %write = OpLabel\n"
                        "      %ndx = OpLoad %i32 %i\n"
 
-                       "     %src0 = OpAccessChain %up_f16 %ssbo_src0 %c_i32_0 %ndx\n"
-                       " %val_src0 = OpLoad %f16 %src0\n"
+                       " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
 
                        "${op_arg1_calc}"
 
                        " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
                        "  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
-                       "      %dst = OpAccessChain %up_f16 %ssbo_dst %c_i32_0 %ndx\n"
-                       "             OpStore %dst %val_dst\n"
+                       "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
                        "             OpBranch %next\n"
 
                        "     %next = OpLabel\n"
@@ -10240,8 +10907,7 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
 
                const StringTemplate arg1Calc
                (
-                       "     %src1 = OpAccessChain %up_f16 %ssbo_src1 %c_i32_0 %ndx\n"
-                       " %val_src1 = OpLoad %f16 %src1\n"
+                       " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
                );
 
                for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
@@ -10266,13 +10932,18 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
                        fragments["decoration"]         = decoration.specialize(specs);
                        fragments["pre_main"]           = preMain.specialize(specs);
                        fragments["testfun"]            = testFun.specialize(specs);
+                       fragments["testfun"]            += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
+                       if (testOp.argCount > 1)
+                       {
+                               fragments["testfun"]    += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
+                       }
+                       fragments["testfun"]            += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
 
                        specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                        specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                        specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                        specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
 
-                       extensions.push_back("VK_KHR_16bit_storage");
                        extensions.push_back("VK_KHR_shader_float16_int8");
 
                        if (nanSupported)
@@ -10282,7 +10953,6 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
                                features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
                        }
 
-                       features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
                        features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
 
                        finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
@@ -10291,26 +10961,28 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
        { // vector cases
                const StringTemplate preMain
                (
-                       "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
-                       "     %v2bool = OpTypeVector %bool 2\n"
-                       "        %f16 = OpTypeFloat 16\n"
-                       "    %c_f16_0 = OpConstant %f16 0.0\n"
-                       "    %c_f16_1 = OpConstant %f16 1.0\n"
-                       "      %v2f16 = OpTypeVector %f16 2\n"
-                       "%c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
-                       "%c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
-                       "   %up_v2f16 = OpTypePointer Uniform %v2f16\n"
-                       "   %ra_v2f16 = OpTypeArray %v2f16 %c_i32_ndp\n"
-                       "     %SSBO16 = OpTypeStruct %ra_v2f16\n"
-                       "  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
-                       "  %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
-                       "  %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
-                       "   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
+                       "        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
+                       "           %v2bool = OpTypeVector %bool 2\n"
+                       "              %f16 = OpTypeFloat 16\n"
+                       "          %c_f16_0 = OpConstant %f16 0.0\n"
+                       "          %c_f16_1 = OpConstant %f16 1.0\n"
+                       "            %v2f16 = OpTypeVector %f16 2\n"
+                       "      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
+                       "      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
+                       "           %up_u32 = OpTypePointer Uniform %u32\n"
+                       "           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
+                       "           %SSBO16 = OpTypeStruct %ra_u32\n"
+                       "        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
+                       "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
+                       "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
+                       "        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
+                       "        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
+                       "         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
                );
 
                const StringTemplate decoration
                (
-                       "OpDecorate %ra_v2f16 ArrayStride 4\n"
+                       "OpDecorate %ra_u32 ArrayStride 4\n"
                        "OpMemberDecorate %SSBO16 0 Offset 0\n"
                        "OpDecorate %SSBO16 BufferBlock\n"
                        "OpDecorate %ssbo_src0 DescriptorSet 0\n"
@@ -10340,15 +11012,13 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
                        "    %write = OpLabel\n"
                        "      %ndx = OpLoad %i32 %i\n"
 
-                       "     %src0 = OpAccessChain %up_v2f16 %ssbo_src0 %c_i32_0 %ndx\n"
-                       " %val_src0 = OpLoad %v2f16 %src0\n"
+                       " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
 
                        "${op_arg1_calc}"
 
                        " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
                        "  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
-                       "      %dst = OpAccessChain %up_v2f16 %ssbo_dst %c_i32_0 %ndx\n"
-                       "             OpStore %dst %val_dst\n"
+                       "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
                        "             OpBranch %next\n"
 
                        "     %next = OpLabel\n"
@@ -10365,8 +11035,7 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
 
                const StringTemplate arg1Calc
                (
-                       "     %src1 = OpAccessChain %up_v2f16 %ssbo_src1 %c_i32_0 %ndx\n"
-                       " %val_src1 = OpLoad %v2f16 %src1\n"
+                       " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
                );
 
                for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
@@ -10392,13 +11061,18 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
                        fragments["decoration"]         = decoration.specialize(specs);
                        fragments["pre_main"]           = preMain.specialize(specs);
                        fragments["testfun"]            = testFun.specialize(specs);
+                       fragments["testfun"]            += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
+                       if (testOp.argCount > 1)
+                       {
+                               fragments["testfun"]    += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
+                       }
+                       fragments["testfun"]            += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
 
                        specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                        specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                        specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                        specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
 
-                       extensions.push_back("VK_KHR_16bit_storage");
                        extensions.push_back("VK_KHR_shader_float16_int8");
 
                        if (nanSupported)
@@ -10408,7 +11082,6 @@ tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bo
                                features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
                        }
 
-                       features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
                        features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
 
                        finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
@@ -10450,7 +11123,7 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
        de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
 
        de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
-       const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
+       const StringTemplate                            capabilities            ("OpCapability Float16\n");
        const deUint32                                          numDataPoints           = 256;
        const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
        const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
@@ -10461,6 +11134,9 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
                const deUint32  typeComponents;
                const char*             typeName;
                const char*             typeDecls;
+               const char*             typeStorage;
+               const string            loadFunc;
+               const string            storeFunc;
        };
 
        const TestType  testTypes[]     =
@@ -10468,25 +11144,44 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
                {
                        1,
                        "f16",
-                       ""
+                       "      %v2f16 = OpTypeVector %f16 2\n"
+                       "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
+                       "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
+                       "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
+                       " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
+                       "u32_hndp",
+                       loadScalarF16FromUint,
+                       storeScalarF16AsUint
                },
                {
                        2,
                        "v2f16",
                        "      %v2f16 = OpTypeVector %f16 2\n"
                        "  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
+                       "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
+                       "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
+                       "u32_ndp",
+                       loadV2F16FromUint,
+                       storeV2F16AsUint
                },
                {
                        4,
                        "v4f16",
+                       "      %v2f16 = OpTypeVector %f16 2\n"
                        "      %v4f16 = OpTypeVector %f16 4\n"
                        "  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
+                       "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
+                       "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
+                       "ra_u32_2",
+                       loadV4F16FromUints,
+                       storeV4F16AsUints
                },
        };
 
        const StringTemplate preMain
        (
                "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
+               " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
                "     %v2bool = OpTypeVector %bool 2\n"
                "        %f16 = OpTypeFloat 16\n"
                "    %c_f16_0 = OpConstant %f16 0.0\n"
@@ -10494,9 +11189,12 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
                "${type_decls}"
 
                "  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
-               "   %up_${tt} = OpTypePointer Uniform %${tt}\n"
-               "   %ra_${tt} = OpTypeArray %${tt} %c_i32_ndp\n"
-               "     %SSBO16 = OpTypeStruct %ra_${tt}\n"
+               "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
+               "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
+               " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
+               "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
+               "         %up_u32 = OpTypePointer Uniform %u32\n"
+               "     %SSBO16 = OpTypeStruct %ra_${ts}\n"
                "  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
                "   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
                "   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
@@ -10504,7 +11202,10 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
 
        const StringTemplate decoration
        (
-               "OpDecorate %ra_${tt} ArrayStride ${tt_stride}\n"
+               "OpDecorate %ra_u32_2 ArrayStride 4\n"
+               "OpDecorate %ra_u32_hndp ArrayStride 4\n"
+               "OpDecorate %ra_u32_ndp ArrayStride 4\n"
+               "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
                "OpMemberDecorate %SSBO16 0 Offset 0\n"
                "OpDecorate %SSBO16 BufferBlock\n"
                "OpDecorate %ssbo_src DescriptorSet 0\n"
@@ -10532,12 +11233,9 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
                "    %write = OpLabel\n"
                "      %ndx = OpLoad %i32 %i\n"
 
-               "      %src = OpAccessChain %up_${tt} %ssbo_src %c_i32_0 %ndx\n"
-               "  %val_src = OpLoad %${tt} %src\n"
-
+               "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
                "  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
-               "      %dst = OpAccessChain %up_${tt} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n"
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
                "             OpBranch %next\n"
 
                "     %next = OpLabel\n"
@@ -10571,26 +11269,25 @@ tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
                VulkanFeatures          features;
                vector<string>          extensions;
 
-               specs["cap"]                            = "StorageUniformBufferBlock16";
                specs["num_data_points"]        = de::toString(iterations);
                specs["tt"]                                     = testType.typeName;
+               specs["ts"]                                     = testType.typeStorage;
                specs["tt_stride"]                      = de::toString(typeStride);
                specs["type_decls"]                     = testType.typeDecls;
 
-               fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
                fragments["capability"]         = capabilities.specialize(specs);
                fragments["decoration"]         = decoration.specialize(specs);
                fragments["pre_main"]           = preMain.specialize(specs);
                fragments["testfun"]            = testFun.specialize(specs);
+               fragments["testfun"]            += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
+               fragments["testfun"]            += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
 
                specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
                specResource.verifyIO = compareFP16FunctionSetFunc;
 
-               extensions.push_back("VK_KHR_16bit_storage");
                extensions.push_back("VK_KHR_shader_float16_int8");
 
-               features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
                features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
 
                finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
@@ -10804,7 +11501,7 @@ tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
                specs["type_decl"]                      = testType.typeDecls;
 
                fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
-               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\n";
+               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
                fragments["decoration"]         = decoration.specialize(specs);
                fragments["pre_main"]           = preMain.specialize(specs);
                fragments["testfun"]            = testFun.specialize(specs);
@@ -11042,7 +11739,7 @@ tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
                specs["replacement"]            = de::toString(replacement);
 
                fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
-               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\n";
+               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
                fragments["decoration"]         = decoration.specialize(specs);
                fragments["pre_main"]           = preMain.specialize(specs);
                fragments["testfun"]            = testFun.specialize(specs);
@@ -11413,7 +12110,7 @@ tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
                                specs["case_count"]                     = de::toString(caseCount);
 
                                fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
-                               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\n";
+                               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
                                fragments["decoration"]         = decoration.specialize(specs);
                                fragments["pre_main"]           = preMain.specialize(specs);
                                fragments["testfun"]            = testFun.specialize(specs);
@@ -11673,7 +12370,7 @@ tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCt
                specs["consts"]                         = consts;
 
                fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
-               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\n";
+               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
                fragments["decoration"]         = decoration.specialize(specs);
                fragments["pre_main"]           = preMain.specialize(specs);
                fragments["testfun"]            = testFun.specialize(specs);
@@ -12066,7 +12763,7 @@ tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& te
                specs["op_case_default_value"]  = opParts.caseDefaultValue;
 
                fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
-               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\n";
+               fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
                fragments["decoration"]         = decoration.specialize(specs);
                fragments["pre_main"]           = preMain.specialize(specs);
                fragments["testfun"]            = testFun.specialize(specs);
@@ -15900,6 +16597,7 @@ struct Math16TestType
        const size_t    typeComponents;
        const size_t    typeArrayStride;
        const size_t    typeStructStride;
+       const char*             storage_type;
 };
 
 enum Math16DataTypes
@@ -15954,20 +16652,20 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        const char*                                     componentType                           = "f16";
        const Math16TestType            testTypes[MATH16_TYPE_LAST]     =
        {
-               { "",           0,       0,                                              0,                                             },
-               { "",           1,       1 * sizeof(deFloat16),  2 * sizeof(deFloat16)  },
-               { "v2",         2,       2 * sizeof(deFloat16),  2 * sizeof(deFloat16)  },
-               { "v3",         3,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16)  },
-               { "v4",         4,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16)  },
-               { "m2x2",       0,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16)  },
-               { "m2x3",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16)  },
-               { "m2x4",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16)  },
-               { "m3x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16)  },
-               { "m3x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16)  },
-               { "m3x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16)  },
-               { "m4x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16)  },
-               { "m4x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16)  },
-               { "m4x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16)  },
+               { "",           0,       0,                                              0,                                             "" },
+               { "",           1,       1 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_half_ndp" },
+               { "v2",         2,       2 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_ndp" },
+               { "v3",         3,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
+               { "v4",         4,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
+               { "m2x2",       0,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
+               { "m2x3",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
+               { "m2x4",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
+               { "m3x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_3" },
+               { "m3x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
+               { "m3x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
+               { "m4x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
+               { "m4x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
+               { "m4x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
        };
 
        DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
@@ -15991,131 +16689,118 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
                "        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
                "        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
 
-               "     %up_f16     = OpTypePointer Uniform %f16    \n"
-               "     %up_v2f16   = OpTypePointer Uniform %v2f16  \n"
-               "     %up_v3f16   = OpTypePointer Uniform %v3f16  \n"
-               "     %up_v4f16   = OpTypePointer Uniform %v4f16  \n"
-               "     %up_m2x2f16 = OpTypePointer Uniform %m2x2f16\n"
-               "     %up_m2x3f16 = OpTypePointer Uniform %m2x3f16\n"
-               "     %up_m2x4f16 = OpTypePointer Uniform %m2x4f16\n"
-               "     %up_m3x2f16 = OpTypePointer Uniform %m3x2f16\n"
-               "     %up_m3x3f16 = OpTypePointer Uniform %m3x3f16\n"
-               "     %up_m3x4f16 = OpTypePointer Uniform %m3x4f16\n"
-               "     %up_m4x2f16 = OpTypePointer Uniform %m4x2f16\n"
-               "     %up_m4x3f16 = OpTypePointer Uniform %m4x3f16\n"
-               "     %up_m4x4f16 = OpTypePointer Uniform %m4x4f16\n"
-
-               "     %ra_f16     = OpTypeArray %f16     %c_i32_ndp\n"
-               "     %ra_v2f16   = OpTypeArray %v2f16   %c_i32_ndp\n"
-               "     %ra_v3f16   = OpTypeArray %v3f16   %c_i32_ndp\n"
-               "     %ra_v4f16   = OpTypeArray %v4f16   %c_i32_ndp\n"
-               "     %ra_m2x2f16 = OpTypeArray %m2x2f16 %c_i32_ndp\n"
-               "     %ra_m2x3f16 = OpTypeArray %m2x3f16 %c_i32_ndp\n"
-               "     %ra_m2x4f16 = OpTypeArray %m2x4f16 %c_i32_ndp\n"
-               "     %ra_m3x2f16 = OpTypeArray %m3x2f16 %c_i32_ndp\n"
-               "     %ra_m3x3f16 = OpTypeArray %m3x3f16 %c_i32_ndp\n"
-               "     %ra_m3x4f16 = OpTypeArray %m3x4f16 %c_i32_ndp\n"
-               "     %ra_m4x2f16 = OpTypeArray %m4x2f16 %c_i32_ndp\n"
-               "     %ra_m4x3f16 = OpTypeArray %m4x3f16 %c_i32_ndp\n"
-               "     %ra_m4x4f16 = OpTypeArray %m4x4f16 %c_i32_ndp\n"
-
-               "   %SSBO_f16     = OpTypeStruct %ra_f16    \n"
-               "   %SSBO_v2f16   = OpTypeStruct %ra_v2f16  \n"
-               "   %SSBO_v3f16   = OpTypeStruct %ra_v3f16  \n"
-               "   %SSBO_v4f16   = OpTypeStruct %ra_v4f16  \n"
-               "   %SSBO_m2x2f16 = OpTypeStruct %ra_m2x2f16\n"
-               "   %SSBO_m2x3f16 = OpTypeStruct %ra_m2x3f16\n"
-               "   %SSBO_m2x4f16 = OpTypeStruct %ra_m2x4f16\n"
-               "   %SSBO_m3x2f16 = OpTypeStruct %ra_m3x2f16\n"
-               "   %SSBO_m3x3f16 = OpTypeStruct %ra_m3x3f16\n"
-               "   %SSBO_m3x4f16 = OpTypeStruct %ra_m3x4f16\n"
-               "   %SSBO_m4x2f16 = OpTypeStruct %ra_m4x2f16\n"
-               "   %SSBO_m4x3f16 = OpTypeStruct %ra_m4x3f16\n"
-               "   %SSBO_m4x4f16 = OpTypeStruct %ra_m4x4f16\n"
-
-               "%up_SSBO_f16     = OpTypePointer Uniform %SSBO_f16    \n"
-               "%up_SSBO_v2f16   = OpTypePointer Uniform %SSBO_v2f16  \n"
-               "%up_SSBO_v3f16   = OpTypePointer Uniform %SSBO_v3f16  \n"
-               "%up_SSBO_v4f16   = OpTypePointer Uniform %SSBO_v4f16  \n"
-               "%up_SSBO_m2x2f16 = OpTypePointer Uniform %SSBO_m2x2f16\n"
-               "%up_SSBO_m2x3f16 = OpTypePointer Uniform %SSBO_m2x3f16\n"
-               "%up_SSBO_m2x4f16 = OpTypePointer Uniform %SSBO_m2x4f16\n"
-               "%up_SSBO_m3x2f16 = OpTypePointer Uniform %SSBO_m3x2f16\n"
-               "%up_SSBO_m3x3f16 = OpTypePointer Uniform %SSBO_m3x3f16\n"
-               "%up_SSBO_m3x4f16 = OpTypePointer Uniform %SSBO_m3x4f16\n"
-               "%up_SSBO_m4x2f16 = OpTypePointer Uniform %SSBO_m4x2f16\n"
-               "%up_SSBO_m4x3f16 = OpTypePointer Uniform %SSBO_m4x3f16\n"
-               "%up_SSBO_m4x4f16 = OpTypePointer Uniform %SSBO_m4x4f16\n"
-
                "       %fp_v2i32 = OpTypePointer Function %v2i32\n"
                "       %fp_v3i32 = OpTypePointer Function %v3i32\n"
                "       %fp_v4i32 = OpTypePointer Function %v4i32\n"
+
+               "      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
+               " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
+               "        %c_u32_5 = OpConstant %u32 5\n"
+               "        %c_u32_6 = OpConstant %u32 6\n"
+               "        %c_u32_7 = OpConstant %u32 7\n"
+               "        %c_u32_8 = OpConstant %u32 8\n"
+               "        %c_f16_0 = OpConstant %f16 0\n"
+               "        %c_f16_1 = OpConstant %f16 1\n"
+               "      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
+               "         %up_u32 = OpTypePointer Uniform %u32\n"
+               "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
+               " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
+
+               "    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
+               "  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
+               "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
+               "         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
+               "       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
+               "    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
+               "           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
+               "        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
+               "      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
+               "     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
+               "  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
+               "           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
+               "        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
+               "        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
+               "     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
+               "  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
+               "           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
+               "        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
+               "        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
+               "     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
+               "  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
+               "           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
+               "        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
+               "        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
+               "     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
+               "  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
+               "           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
+               "        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
+               "        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
+               "     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
+               "  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
+
+               "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
+               "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
+               "       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
+               "       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
+               "     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
+               "     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
+               "     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
+               "     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
+               "     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
+               "     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
+               "     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
+               "     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
+               "     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
+               "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
+               "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
+               "  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
+               "  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
+               "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
+               "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
+               "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
+               "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
+               "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
+               "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
+               "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
+               "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
+               "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
                "${arg_vars}"
        );
 
        const StringTemplate decoration
        (
-               "OpDecorate %ra_f16     ArrayStride 2 \n"
-               "OpDecorate %ra_v2f16   ArrayStride 4 \n"
-               "OpDecorate %ra_v3f16   ArrayStride 8 \n"
-               "OpDecorate %ra_v4f16   ArrayStride 8 \n"
-               "OpDecorate %ra_m2x2f16 ArrayStride 8 \n"
-               "OpDecorate %ra_m2x3f16 ArrayStride 16\n"
-               "OpDecorate %ra_m2x4f16 ArrayStride 16\n"
-               "OpDecorate %ra_m3x2f16 ArrayStride 16\n"
-               "OpDecorate %ra_m3x3f16 ArrayStride 32\n"
-               "OpDecorate %ra_m3x4f16 ArrayStride 32\n"
-               "OpDecorate %ra_m4x2f16 ArrayStride 16\n"
-               "OpDecorate %ra_m4x3f16 ArrayStride 32\n"
-               "OpDecorate %ra_m4x4f16 ArrayStride 32\n"
-
-               "OpMemberDecorate %SSBO_f16     0 Offset 0\n"
-               "OpMemberDecorate %SSBO_v2f16   0 Offset 0\n"
-               "OpMemberDecorate %SSBO_v3f16   0 Offset 0\n"
-               "OpMemberDecorate %SSBO_v4f16   0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m2x2f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m2x3f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m2x4f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m3x2f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m3x3f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m3x4f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m4x2f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m4x3f16 0 Offset 0\n"
-               "OpMemberDecorate %SSBO_m4x4f16 0 Offset 0\n"
-
-               "OpDecorate %SSBO_f16     BufferBlock\n"
-               "OpDecorate %SSBO_v2f16   BufferBlock\n"
-               "OpDecorate %SSBO_v3f16   BufferBlock\n"
-               "OpDecorate %SSBO_v4f16   BufferBlock\n"
-               "OpDecorate %SSBO_m2x2f16 BufferBlock\n"
-               "OpDecorate %SSBO_m2x3f16 BufferBlock\n"
-               "OpDecorate %SSBO_m2x4f16 BufferBlock\n"
-               "OpDecorate %SSBO_m3x2f16 BufferBlock\n"
-               "OpDecorate %SSBO_m3x3f16 BufferBlock\n"
-               "OpDecorate %SSBO_m3x4f16 BufferBlock\n"
-               "OpDecorate %SSBO_m4x2f16 BufferBlock\n"
-               "OpDecorate %SSBO_m4x3f16 BufferBlock\n"
-               "OpDecorate %SSBO_m4x4f16 BufferBlock\n"
-
-               "OpMemberDecorate %SSBO_m2x2f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m2x3f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m2x4f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m3x2f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m3x3f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m3x4f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m4x2f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m4x3f16 0 ColMajor\n"
-               "OpMemberDecorate %SSBO_m4x4f16 0 ColMajor\n"
-
-               "OpMemberDecorate %SSBO_m2x2f16 0 MatrixStride 4\n"
-               "OpMemberDecorate %SSBO_m2x3f16 0 MatrixStride 8\n"
-               "OpMemberDecorate %SSBO_m2x4f16 0 MatrixStride 8\n"
-               "OpMemberDecorate %SSBO_m3x2f16 0 MatrixStride 4\n"
-               "OpMemberDecorate %SSBO_m3x3f16 0 MatrixStride 8\n"
-               "OpMemberDecorate %SSBO_m3x4f16 0 MatrixStride 8\n"
-               "OpMemberDecorate %SSBO_m4x2f16 0 MatrixStride 4\n"
-               "OpMemberDecorate %SSBO_m4x3f16 0 MatrixStride 8\n"
-               "OpMemberDecorate %SSBO_m4x4f16 0 MatrixStride 8\n"
+               "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
+               "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
+
+               "OpDecorate %ra_u32_ndp ArrayStride 4\n"
+               "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_ndp BufferBlock\n"
+
+               "OpDecorate %ra_u32_2 ArrayStride 4\n"
+               "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
+               "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
+
+               "OpDecorate %ra_u32_4 ArrayStride 4\n"
+               "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
+               "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
+
+               "OpDecorate %ra_u32_3 ArrayStride 4\n"
+               "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
+               "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
+
+               "OpDecorate %ra_u32_6 ArrayStride 4\n"
+               "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
+               "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
+
+               "OpDecorate %ra_u32_8 ArrayStride 4\n"
+               "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
+               "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
+               "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
 
                "${arg_decorations}"
        );
@@ -16157,11 +16842,9 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragment1    =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               "     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
                "",
                "",
                "",
@@ -16169,13 +16852,10 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragment2    =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
-               "     %src1 = OpAccessChain %up_${t1} %ssbo_src1 %c_i32_0 %ndx\n"
-               " %val_src1 = OpLoad %${t1} %src1\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
+               " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
                "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
                "",
                "",
                "",
@@ -16183,15 +16863,11 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragment3    =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
-               "     %src1 = OpAccessChain %up_${t1} %ssbo_src1 %c_i32_0 %ndx\n"
-               " %val_src1 = OpLoad %${t1} %src1\n"
-               "     %src2 = OpAccessChain %up_${t2} %ssbo_src2 %c_i32_0 %ndx\n"
-               " %val_src2 = OpLoad %${t2} %src2\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
+               " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
+               " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
                "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
                "",
                "",
                "",
@@ -16199,14 +16875,11 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentLdExp        =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
-               "     %src1 = OpAccessChain %up_${t1} %ssbo_src1 %c_i32_0 %ndx\n"
-               " %val_src1 = OpLoad %${t1} %src1\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
+               " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
                "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
                "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "",
 
@@ -16217,11 +16890,9 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentModfFrac     =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "   %fp_tmp = OpTypePointer Function %${tr}\n",
 
@@ -16232,13 +16903,11 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentModfInt      =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
                "     %tmp0 = OpAccessChain %fp_tmp %tmp\n"
                "  %val_dst = OpLoad %${tr} %tmp0\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "   %fp_tmp = OpTypePointer Function %${tr}\n",
 
@@ -16249,15 +16918,13 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentModfStruct   =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
                "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
                "             OpStore %tmp_ptr_s %val_tmp\n"
                "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
                "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "  %fp_${tr} = OpTypePointer Function %${tr}\n"
                "   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
@@ -16273,15 +16940,13 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentFrexpStructS =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
                "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
                "             OpStore %tmp_ptr_s %val_tmp\n"
                "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
                "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "  %fp_${tr} = OpTypePointer Function %${tr}\n"
                "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
@@ -16295,16 +16960,14 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentFrexpStructE =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
                "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
                "             OpStore %tmp_ptr_s %val_tmp\n"
                "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
                "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
                "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
                "   %fp_tmp = OpTypePointer Function %st_tmp\n",
@@ -16317,12 +16980,10 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentFrexpS               =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
                "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "",
 
@@ -16333,14 +16994,12 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
 
        const Math16ArgFragments        argFragmentFrexpE               =
        {
-               "     %src0 = OpAccessChain %up_${t0} %ssbo_src0 %c_i32_0 %ndx\n"
-               " %val_src0 = OpLoad %${t0} %src0\n"
+               " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
                "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
                "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
                "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
                "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
-               "      %dst = OpAccessChain %up_${tr} %ssbo_dst %c_i32_0 %ndx\n"
-               "             OpStore %dst %val_dst\n",
+               "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
 
                "",
 
@@ -16349,6 +17008,36 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
                "      %tmp = OpVariable %fp_${dr}i32 Function\n",
        };
 
+       string load_funcs[MATH16_TYPE_LAST];
+       load_funcs[SCALAR] = loadScalarF16FromUint;
+       load_funcs[VEC2]   = loadV2F16FromUint;
+       load_funcs[VEC3]   = loadV3F16FromUints;
+       load_funcs[VEC4]   = loadV4F16FromUints;
+       load_funcs[MAT2X2] = loadM2x2F16FromUints;
+       load_funcs[MAT2X3] = loadM2x3F16FromUints;
+       load_funcs[MAT2X4] = loadM2x4F16FromUints;
+       load_funcs[MAT3X2] = loadM3x2F16FromUints;
+       load_funcs[MAT3X3] = loadM3x3F16FromUints;
+       load_funcs[MAT3X4] = loadM3x4F16FromUints;
+       load_funcs[MAT4X2] = loadM4x2F16FromUints;
+       load_funcs[MAT4X3] = loadM4x3F16FromUints;
+       load_funcs[MAT4X4] = loadM4x4F16FromUints;
+
+       string store_funcs[MATH16_TYPE_LAST];
+       store_funcs[SCALAR] = storeScalarF16AsUint;
+       store_funcs[VEC2]   = storeV2F16AsUint;
+       store_funcs[VEC3]   = storeV3F16AsUints;
+       store_funcs[VEC4]   = storeV4F16AsUints;
+       store_funcs[MAT2X2] = storeM2x2F16AsUints;
+       store_funcs[MAT2X3] = storeM2x3F16AsUints;
+       store_funcs[MAT2X4] = storeM2x4F16AsUints;
+       store_funcs[MAT3X2] = storeM3x2F16AsUints;
+       store_funcs[MAT3X3] = storeM3x3F16AsUints;
+       store_funcs[MAT3X4] = storeM3x4F16AsUints;
+       store_funcs[MAT4X2] = storeM4x2F16AsUints;
+       store_funcs[MAT4X3] = storeM4x3F16AsUints;
+       store_funcs[MAT4X4] = storeM4x4F16AsUints;
+
        const Math16TestType&           testType                                = testTypes[testTypeIdx];
        const string                            funcNameString                  = string(testFunc.funcName) + string(testFunc.funcSuffix);
        const string                            testName                                = de::toLower(funcNameString);
@@ -16369,6 +17058,7 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        string                                          variables;
        string                                          declarations;
        string                                          decorations;
+       string                                          functions;
 
        switch (testFunc.funcArgsCount)
        {
@@ -16407,11 +17097,13 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
                }
        }
 
+       functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
        if (testFunc.funcArgsCount == 1)
        {
+               functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
                variables +=
-                       " %ssbo_src0 = OpVariable %up_SSBO_${t0} Uniform\n"
-                       "  %ssbo_dst = OpVariable %up_SSBO_${tr} Uniform\n";
+                       " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
+                       "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
 
                decorations +=
                        "OpDecorate %ssbo_src0 DescriptorSet 0\n"
@@ -16421,10 +17113,12 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        }
        else if (testFunc.funcArgsCount == 2)
        {
+               functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
+               functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
                variables +=
-                       " %ssbo_src0 = OpVariable %up_SSBO_${t0} Uniform\n"
-                       " %ssbo_src1 = OpVariable %up_SSBO_${t1} Uniform\n"
-                       "  %ssbo_dst = OpVariable %up_SSBO_${tr} Uniform\n";
+                       " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
+                       " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
+                       "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
 
                decorations +=
                        "OpDecorate %ssbo_src0 DescriptorSet 0\n"
@@ -16436,11 +17130,14 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        }
        else if (testFunc.funcArgsCount == 3)
        {
+               functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
+               functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
+               functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
                variables +=
-                       " %ssbo_src0 = OpVariable %up_SSBO_${t0} Uniform\n"
-                       " %ssbo_src1 = OpVariable %up_SSBO_${t1} Uniform\n"
-                       " %ssbo_src2 = OpVariable %up_SSBO_${t2} Uniform\n"
-                       "  %ssbo_dst = OpVariable %up_SSBO_${tr} Uniform\n";
+                       " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
+                       " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
+                       " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
+                       "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
 
                decorations +=
                        "OpDecorate %ssbo_src0 DescriptorSet 0\n"
@@ -16468,6 +17165,10 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        specs["t0"]                                     = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
        specs["t1"]                                     = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
        specs["t2"]                                     = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
+       specs["store_tr"]                       = string(testTypes[testFunc.typeResult].storage_type);
+       specs["store_t0"]                       = string(testTypes[testFunc.typeArg0].storage_type);
+       specs["store_t1"]                       = string(testTypes[testFunc.typeArg1].storage_type);
+       specs["store_t2"]                       = string(testTypes[testFunc.typeArg2].storage_type);
        specs["struct_stride"]          = de::toString(typeStructStride);
        specs["op"]                                     = extInst ? "OpExtInst" : testFunc.funcName;
        specs["ext_inst"]                       = extInst ? string("%ext_import ") + testFunc.funcName : "";
@@ -16484,10 +17185,10 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        specs["arg_infunc_vars"]        = funcVariables;
        specs["arg_func_call"]          = funcCall;
 
-       fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"\n%ext_import = OpExtInstImport \"GLSL.std.450\"";
-       fragments["capability"]         = "OpCapability Matrix\nOpCapability StorageUniformBufferBlock16";
+       fragments["extension"]          = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
+       fragments["capability"]         = "OpCapability Matrix\nOpCapability Float16\n";
        fragments["decoration"]         = decoration.specialize(specs);
-       fragments["pre_main"]           = preMain.specialize(specs);
+       fragments["pre_main"]           = preMain.specialize(specs) + functions;
        fragments["testfun"]            = testFun.specialize(specs);
 
        for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
@@ -16504,10 +17205,8 @@ void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGr
        specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
        specResource.verifyIO = testFunc.verifyFunc;
 
-       extensions.push_back("VK_KHR_16bit_storage");
        extensions.push_back("VK_KHR_shader_float16_int8");
 
-       features.ext16BitStorage        = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
        features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
 
        finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
@@ -18199,6 +18898,7 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        computeTests->addChild(createOpAtomicGroup(testCtx, false));
        computeTests->addChild(createOpAtomicGroup(testCtx, true));                                     // Using new StorageBuffer decoration
        computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true));        // Return value validation
+       computeTests->addChild(createOpAtomicGroup(testCtx, true, 65536, false, true)); // volatile atomics
        computeTests->addChild(createOpLineGroup(testCtx));
        computeTests->addChild(createOpModuleProcessedGroup(testCtx));
        computeTests->addChild(createOpNoLineGroup(testCtx));
@@ -18270,6 +18970,8 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        computeTests->addChild(createSignedIntCompareGroup(testCtx));
        computeTests->addChild(createUnusedVariableComputeTests(testCtx));
        computeTests->addChild(createPtrAccessChainGroup(testCtx));
+       computeTests->addChild(createHlslComputeGroup(testCtx));
+       computeTests->addChild(create64bitCompareComputeGroup(testCtx));
 
        graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
        graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
@@ -18332,9 +19034,11 @@ tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
        graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
        graphicsTests->addChild(createFloat16Tests(testCtx));
        graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
+       graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
 
        instructionTests->addChild(computeTests.release());
        instructionTests->addChild(graphicsTests.release());
+       instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
 
        return instructionTests.release();
 }