external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2015 Google Inc.
   6  * Copyright (c) 2016 The Khronos Group Inc.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  *
  20  *//*!
  21  * \file
  22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
  23  *//*--------------------------------------------------------------------*/
  24
  25 #include "vktSpvAsmInstructionTests.hpp"
  26 #include "vktAmberTestCase.hpp"
  27
  28 #include "tcuCommandLine.hpp"
  29 #include "tcuFormatUtil.hpp"
  30 #include "tcuFloat.hpp"
  31 #include "tcuFloatFormat.hpp"
  32 #include "tcuRGBA.hpp"
  33 #include "tcuStringTemplate.hpp"
  34 #include "tcuTestLog.hpp"
  35 #include "tcuVectorUtil.hpp"
  36 #include "tcuInterval.hpp"
  37
  38 #include "vkDefs.hpp"
  39 #include "vkDeviceUtil.hpp"
  40 #include "vkMemUtil.hpp"
  41 #include "vkPlatform.hpp"
  42 #include "vkPrograms.hpp"
  43 #include "vkQueryUtil.hpp"
  44 #include "vkRef.hpp"
  45 #include "vkRefUtil.hpp"
  46 #include "vkStrUtil.hpp"
  47 #include "vkTypeUtil.hpp"
  48
  49 #include "deStringUtil.hpp"
  50 #include "deUniquePtr.hpp"
  51 #include "deMath.h"
  52 #include "deRandom.hpp"
  53 #include "tcuStringTemplate.hpp"
  54
  55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
  56 #include "vktSpvAsm8bitStorageTests.hpp"
  57 #include "vktSpvAsm16bitStorageTests.hpp"
  58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
  59 #include "vktSpvAsmConditionalBranchTests.hpp"
  60 #include "vktSpvAsmIndexingTests.hpp"
  61 #include "vktSpvAsmImageSamplerTests.hpp"
  62 #include "vktSpvAsmComputeShaderCase.hpp"
  63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
  64 #include "vktSpvAsmFloatControlsTests.hpp"
  65 #include "vktSpvAsmFromHlslTests.hpp"
  66 #include "vktSpvAsmEmptyStructTests.hpp"
  67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
  68 #include "vktSpvAsmVariablePointersTests.hpp"
  69 #include "vktSpvAsmVariableInitTests.hpp"
  70 #include "vktSpvAsmPointerParameterTests.hpp"
  71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
  72 #include "vktSpvAsmSpirvVersionTests.hpp"
  73 #include "vktTestCaseUtil.hpp"
  74 #include "vktSpvAsmLoopDepLenTests.hpp"
  75 #include "vktSpvAsmLoopDepInfTests.hpp"
  76 #include "vktSpvAsmCompositeInsertTests.hpp"
  77 #include "vktSpvAsmVaryingNameTests.hpp"
  78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
  79 #include "vktSpvAsmSignedIntCompareTests.hpp"
  80 #include "vktSpvAsmSignedOpTests.hpp"
  81 #include "vktSpvAsmPtrAccessChainTests.hpp"
  82 #include "vktSpvAsmVectorShuffleTests.hpp"
  83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
  84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
  85 #include "vktSpvAsm64bitCompareTests.hpp"
  86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
  87 #include "vktSpvAsmTerminateInvocationTests.hpp"
  88 #ifndef CTS_USES_VULKANSC
  89 #include "vktSpvAsmIntegerDotProductTests.hpp"
  90 #endif // CTS_USES_VULKANSC
  91 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
  92
  93 #include <cmath>
  94 #include <limits>
  95 #include <map>
  96 #include <string>
  97 #include <sstream>
  98 #include <utility>
  99 #include <stack>
 100
 101 namespace vkt
 102 {
 103 namespace SpirVAssembly
 104 {
 105
 106 namespace
 107 {
 108
 109 using namespace vk;
 110 using std::map;
 111 using std::string;
 112 using std::vector;
 113 using tcu::IVec3;
 114 using tcu::IVec4;
 115 using tcu::RGBA;
 116 using tcu::TestLog;
 117 using tcu::TestStatus;
 118 using tcu::Vec4;
 119 using de::UniquePtr;
 120 using tcu::StringTemplate;
 121 using tcu::Vec4;
 122
 123 const bool TEST_WITH_NAN        = true;
 124 const bool TEST_WITHOUT_NAN     = false;
 125
 126 const string loadScalarF16FromUint =
 127         "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
 128         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 129         "%ld_arg_${var}_entry = OpLabel\n"
 130         "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
 131         "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
 132         "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
 133         "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
 134         "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
 135         "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
 136         "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
 137         "OpReturnValue %ld_arg_${var}_ex\n"
 138         "OpFunctionEnd\n";
 139
 140 const string loadV2F16FromUint =
 141         "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
 142         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 143         "%ld_arg_${var}_entry = OpLabel\n"
 144         "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
 145         "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
 146         "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
 147         "OpReturnValue %ld_arg_${var}_cast\n"
 148         "OpFunctionEnd\n";
 149
 150 const string loadV3F16FromUints =
 151         // Since we allocate a vec4 worth of values, this case is almost the
 152         // same as that case.
 153         "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
 154         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 155         "%ld_arg_${var}_entry = OpLabel\n"
 156         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 157         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 158         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 159         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 160         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 161         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 162         "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
 163         "OpReturnValue %ld_arg_${var}_shuffle\n"
 164         "OpFunctionEnd\n";
 165
 166 const string loadV4F16FromUints =
 167         "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
 168         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 169         "%ld_arg_${var}_entry = OpLabel\n"
 170         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 171         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 172         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 173         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 174         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 175         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 176         "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
 177         "OpReturnValue %ld_arg_${var}_shuffle\n"
 178         "OpFunctionEnd\n";
 179
 180 const string loadM2x2F16FromUints =
 181         "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
 182         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 183         "%ld_arg_${var}_entry = OpLabel\n"
 184         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 185         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 186         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 187         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 188         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 189         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 190         "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
 191         "OpReturnValue %ld_arg_${var}_cons\n"
 192         "OpFunctionEnd\n";
 193
 194 const string loadM2x3F16FromUints =
 195         "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
 196         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 197         "%ld_arg_${var}_entry = OpLabel\n"
 198         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 199         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 200         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 201         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 202         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 203         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 204         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 205         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 206         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 207         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 208         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 209         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 210         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 211         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 212         "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
 213         "OpReturnValue %ld_arg_${var}_mat\n"
 214         "OpFunctionEnd\n";
 215
 216 const string loadM2x4F16FromUints =
 217         "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
 218         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 219         "%ld_arg_${var}_entry = OpLabel\n"
 220         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 221         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 222         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 223         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 224         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 225         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 226         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 227         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 228         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 229         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 230         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 231         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 232         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 233         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 234         "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
 235         "OpReturnValue %ld_arg_${var}_mat\n"
 236         "OpFunctionEnd\n";
 237
 238 const string loadM3x2F16FromUints =
 239         "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
 240         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 241         "%ld_arg_${var}_entry = OpLabel\n"
 242         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 243         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 244         "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 245         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 246         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 247         "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
 248         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 249         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 250         "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
 251         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
 252         "OpReturnValue %ld_arg_${var}_mat\n"
 253         "OpFunctionEnd\n";
 254
 255 const string loadM3x3F16FromUints =
 256         "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
 257         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 258         "%ld_arg_${var}_entry = OpLabel\n"
 259         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 260         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 261         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 262         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 263         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 264         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 265         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 266         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 267         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 268         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 269         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 270         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 271         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 272         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 273         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 274         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 275         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 276         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 277         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 278         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 279         "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
 280         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
 281         "OpReturnValue %ld_arg_${var}_mat\n"
 282         "OpFunctionEnd\n";
 283
 284 const string loadM3x4F16FromUints =
 285         "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
 286         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 287         "%ld_arg_${var}_entry = OpLabel\n"
 288         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 289         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 290         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 291         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 292         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 293         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 294         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 295         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 296         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 297         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 298         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 299         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 300         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 301         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 302         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 303         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 304         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 305         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 306         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 307         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 308         "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
 309         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
 310         "OpReturnValue %ld_arg_${var}_mat\n"
 311         "OpFunctionEnd\n";
 312
 313 const string loadM4x2F16FromUints =
 314         "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
 315         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 316         "%ld_arg_${var}_entry = OpLabel\n"
 317         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 318         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 319         "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 320         "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 321         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 322         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 323         "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
 324         "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
 325         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 326         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 327         "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
 328         "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
 329         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
 330         "OpReturnValue %ld_arg_${var}_mat\n"
 331         "OpFunctionEnd\n";
 332
 333 const string loadM4x3F16FromUints =
 334         "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
 335         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 336         "%ld_arg_${var}_entry = OpLabel\n"
 337         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 338         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 339         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 340         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 341         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 342         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 343         "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
 344         "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
 345         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 346         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 347         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 348         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 349         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 350         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 351         "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
 352         "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
 353         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 354         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 355         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 356         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 357         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 358         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 359         "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
 360         "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
 361         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 362         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 363         "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
 364         "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
 365         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
 366         "OpReturnValue %ld_arg_${var}_mat\n"
 367         "OpFunctionEnd\n";
 368
 369 const string loadM4x4F16FromUints =
 370         "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
 371         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 372         "%ld_arg_${var}_entry = OpLabel\n"
 373         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 374         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 375         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 376         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 377         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 378         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 379         "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
 380         "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
 381         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 382         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 383         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 384         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 385         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 386         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 387         "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
 388         "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
 389         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 390         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 391         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 392         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 393         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 394         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 395         "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
 396         "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
 397         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 398         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 399         "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
 400         "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
 401         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
 402         "OpReturnValue %ld_arg_${var}_mat\n"
 403         "OpFunctionEnd\n";
 404
 405 const string storeScalarF16AsUint =
 406         // This version is sensitive to the initial value in the output buffer.
 407         // The infrastructure sets all output buffer bits to one before invoking
 408         // the shader so this version uses an atomic and to generate the correct
 409         // zeroes.
 410         "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
 411         "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
 412         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 413         "%st_fn_${var}_entry = OpLabel\n"
 414         "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
 415         "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
 416         "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
 417         "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
 418         // Or 16 bits of ones into the half that was not populated with the result.
 419         "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
 420         "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
 421         "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
 422         "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
 423         "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
 424         "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
 425         "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
 426         "OpReturn\n"
 427         "OpFunctionEnd\n";
 428
 429 const string storeV2F16AsUint =
 430         "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
 431         "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
 432         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 433         "%st_fn_${var}_entry = OpLabel\n"
 434         "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
 435         "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
 436         "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
 437         "OpReturn\n"
 438         "OpFunctionEnd\n";
 439
 440 const string storeV3F16AsUints =
 441         // Since we allocate a vec4 worth of values, this case can be treated
 442         // almost the same as a vec4 case. We will store some extra data that
 443         // should not be compared.
 444         "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
 445         "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
 446         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 447         "%st_fn_${var}_entry = OpLabel\n"
 448         "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
 449         "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
 450         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
 451         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
 452         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 453         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 454         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 455         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 456         "OpReturn\n"
 457         "OpFunctionEnd\n";
 458
 459 const string storeV4F16AsUints =
 460         "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
 461         "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
 462         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 463         "%st_fn_${var}_entry = OpLabel\n"
 464         "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
 465         "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
 466         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
 467         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
 468         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 469         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 470         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 471         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 472         "OpReturn\n"
 473         "OpFunctionEnd\n";
 474
 475 const string storeM2x2F16AsUints =
 476         "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
 477         "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
 478         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 479         "%st_fn_${var}_entry = OpLabel\n"
 480         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 481         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 482         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 483         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 484         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 485         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 486         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 487         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 488         "OpReturn\n"
 489         "OpFunctionEnd\n";
 490
 491 const string storeM2x3F16AsUints =
 492         // In the extracted elements for 01 and 11 the second element doesn't
 493         // matter.
 494         "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
 495         "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
 496         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 497         "%st_fn_${var}_entry = OpLabel\n"
 498         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 499         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 500         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 501         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 502         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 503         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 504         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 505         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 506         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 507         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 508         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 509         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 510         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 511         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 512         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 513         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 514         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 515         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 516         "OpReturn\n"
 517         "OpFunctionEnd\n";
 518
 519 const string storeM2x4F16AsUints =
 520         "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
 521         "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
 522         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 523         "%st_fn_${var}_entry = OpLabel\n"
 524         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 525         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 526         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 527         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 528         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 529         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 530         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 531         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 532         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 533         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 534         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 535         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 536         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 537         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 538         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 539         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 540         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 541         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 542         "OpReturn\n"
 543         "OpFunctionEnd\n";
 544
 545 const string storeM3x2F16AsUints =
 546         "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
 547         "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
 548         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 549         "%st_fn_${var}_entry = OpLabel\n"
 550         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 551         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 552         "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
 553         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 554         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 555         "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
 556         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 557         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 558         "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 559         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 560         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 561         "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
 562         "OpReturn\n"
 563         "OpFunctionEnd\n";
 564
 565 const string storeM3x3F16AsUints =
 566         // The second element of the each broken down vec3 doesn't matter.
 567         "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
 568         "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
 569         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 570         "%st_fn_${var}_entry = OpLabel\n"
 571         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 572         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 573         "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
 574         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 575         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 576         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 577         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 578         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 579         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 580         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 581         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 582         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 583         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 584         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 585         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 586         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 587         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 588         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 589         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 590         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 591         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 592         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 593         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 594         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 595         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 596         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 597         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 598         "OpReturn\n"
 599         "OpFunctionEnd\n";
 600
 601 const string storeM3x4F16AsUints =
 602         "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
 603         "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
 604         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 605         "%st_fn_${var}_entry = OpLabel\n"
 606         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 607         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 608         "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
 609         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 610         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 611         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 612         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 613         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 614         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 615         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 616         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 617         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 618         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 619         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 620         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 621         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 622         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 623         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 624         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 625         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 626         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 627         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 628         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 629         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 630         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 631         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 632         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 633         "OpReturn\n"
 634         "OpFunctionEnd\n";
 635
 636 const string storeM4x2F16AsUints =
 637         "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
 638         "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
 639         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 640         "%st_fn_${var}_entry = OpLabel\n"
 641         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 642         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 643         "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
 644         "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
 645         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 646         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 647         "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
 648         "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
 649         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 650         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 651         "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 652         "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 653         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 654         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 655         "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
 656         "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
 657         "OpReturn\n"
 658         "OpFunctionEnd\n";
 659
 660 const string storeM4x3F16AsUints =
 661         // The last element of each decomposed vec3 doesn't matter.
 662         "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
 663         "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
 664         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 665         "%st_fn_${var}_entry = OpLabel\n"
 666         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 667         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 668         "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
 669         "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
 670         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 671         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 672         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 673         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 674         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 675         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 676         "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
 677         "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
 678         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 679         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 680         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 681         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 682         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 683         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 684         "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
 685         "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
 686         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 687         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 688         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 689         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 690         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 691         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 692         "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
 693         "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
 694         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 695         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 696         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 697         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 698         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 699         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 700         "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
 701         "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
 702         "OpReturn\n"
 703         "OpFunctionEnd\n";
 704
 705 const string storeM4x4F16AsUints =
 706         "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
 707         "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
 708         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 709         "%st_fn_${var}_entry = OpLabel\n"
 710         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 711         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 712         "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
 713         "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
 714         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 715         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 716         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 717         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 718         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 719         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 720         "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
 721         "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
 722         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 723         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 724         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 725         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 726         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 727         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 728         "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
 729         "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
 730         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 731         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 732         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 733         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 734         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 735         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 736         "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
 737         "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
 738         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 739         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 740         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 741         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 742         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 743         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 744         "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
 745         "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
 746         "OpReturn\n"
 747         "OpFunctionEnd\n";
 748
 749 template<typename T>
 750 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
 751 {
 752         T* const typedPtr = (T*)dst;
 753         for (int ndx = 0; ndx < numValues; ndx++)
 754                 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
 755 }
 756
 757 // Filter is a function that returns true if a value should pass, false otherwise.
 758 template<typename T, typename FilterT>
 759 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
 760 {
 761         T* const typedPtr = (T*)dst;
 762         T value;
 763         for (int ndx = 0; ndx < numValues; ndx++)
 764         {
 765                 do
 766                         value = de::randomScalar<T>(rnd, minValue, maxValue);
 767                 while (!filter(value));
 768
 769                 typedPtr[offset + ndx] = value;
 770         }
 771 }
 772
 773 // Gets a 64-bit integer with a more logarithmic distribution
 774 deInt64 randomInt64LogDistributed (de::Random& rnd)
 775 {
 776         deInt64 val = rnd.getUint64();
 777         val &= (1ull << rnd.getInt(1, 63)) - 1;
 778         if (rnd.getBool())
 779                 val = -val;
 780         return val;
 781 }
 782
 783 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
 784 {
 785         for (int ndx = 0; ndx < numValues; ndx++)
 786                 dst[ndx] = randomInt64LogDistributed(rnd);
 787 }
 788
 789 template<typename FilterT>
 790 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
 791 {
 792         for (int ndx = 0; ndx < numValues; ndx++)
 793         {
 794                 deInt64 value;
 795                 do {
 796                         value = randomInt64LogDistributed(rnd);
 797                 } while (!filter(value));
 798                 dst[ndx] = value;
 799         }
 800 }
 801
 802 inline bool filterNonNegative (const deInt64 value)
 803 {
 804         return value >= 0;
 805 }
 806
 807 inline bool filterPositive (const deInt64 value)
 808 {
 809         return value > 0;
 810 }
 811
 812 inline bool filterNotZero (const deInt64 value)
 813 {
 814         return value != 0;
 815 }
 816
 817 static void floorAll (vector<float>& values)
 818 {
 819         for (size_t i = 0; i < values.size(); i++)
 820                 values[i] = deFloatFloor(values[i]);
 821 }
 822
 823 static void floorAll (vector<Vec4>& values)
 824 {
 825         for (size_t i = 0; i < values.size(); i++)
 826                 values[i] = floor(values[i]);
 827 }
 828
 829 struct CaseParameter
 830 {
 831         const char*             name;
 832         string                  param;
 833
 834         CaseParameter   (const char* case_, const string& param_) : name(case_), param(param_) {}
 835 };
 836
 837 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
 838 //
 839 // #version 430
 840 //
 841 // layout(std140, set = 0, binding = 0) readonly buffer Input {
 842 //   float elements[];
 843 // } input_data;
 844 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
 845 //   float elements[];
 846 // } output_data;
 847 //
 848 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 849 //
 850 // void main() {
 851 //   uint x = gl_GlobalInvocationID.x;
 852 //   output_data.elements[x] = -input_data.elements[x];
 853 // }
 854
 855 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useLiteralLocalSizeId, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
 856 {
 857         std::ostringstream out;
 858         out << "OpCapability Shader\n"
 859                    "OpMemoryModel Logical GLSL450\n";
 860
 861         if (useLiteralLocalSizeId)
 862         {
 863                 out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n"
 864                            "OpExecutionModeId %main LocalSizeId %const_0 %const_1 %const_2\n";
 865         }
 866         else
 867         {
 868                 out << "OpEntryPoint GLCompute %main \"main\" %id\n";
 869
 870                 if (useLiteralLocalSize)
 871                 {
 872                         out << "OpExecutionMode %main LocalSize "
 873                                 << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
 874                 }
 875         }
 876
 877         out << "OpSource GLSL 430\n"
 878                    "OpName %main           \"main\"\n"
 879                    "OpName %id             \"gl_GlobalInvocationID\"\n"
 880                    "OpDecorate %id BuiltIn GlobalInvocationId\n";
 881
 882         if (useSpecConstantWorkgroupSize)
 883         {
 884                 out << "OpDecorate %spec_0 SpecId 100\n"
 885                            "OpDecorate %spec_1 SpecId 101\n"
 886                            "OpDecorate %spec_2 SpecId 102\n"
 887                            "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
 888         }
 889
 890         if (useLiteralLocalSizeId)
 891         {
 892                 out << getComputeAsmInputOutputBufferTraits("Block")
 893                         << getComputeAsmCommonTypes("StorageBuffer")
 894                         << getComputeAsmInputOutputBuffer("StorageBuffer")
 895                         << "%const_0  = OpConstant %u32 " << workGroupSize.x() << "\n"
 896                            "%const_1  = OpConstant %u32 " << workGroupSize.y() << "\n"
 897                            "%const_2  = OpConstant %u32 " << workGroupSize.z() << "\n";
 898         }
 899         else
 900         {
 901                 out << getComputeAsmInputOutputBufferTraits()
 902                         << getComputeAsmCommonTypes()
 903                         << getComputeAsmInputOutputBuffer();
 904         }
 905
 906         out << "%id        = OpVariable %uvec3ptr Input\n"
 907                    "%zero      = OpConstant %i32 0 \n";
 908
 909         if (useSpecConstantWorkgroupSize)
 910         {
 911                 out << "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
 912                            "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
 913                            "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
 914                            "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
 915         }
 916
 917         out << "%main      = OpFunction %void None %voidf\n"
 918                    "%label     = OpLabel\n"
 919                    "%idval     = OpLoad %uvec3 %id\n"
 920                    "%ndx       = OpCompositeExtract %u32 %idval " << ndx << "\n"
 921
 922                    "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
 923                    "%inval     = OpLoad %f32 %inloc\n"
 924                    "%neg       = OpFNegate %f32 %inval\n"
 925                    "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
 926                    "             OpStore %outloc %neg\n"
 927                    "             OpReturn\n"
 928                    "             OpFunctionEnd\n";
 929
 930         return out.str();
 931 }
 932
 933 tcu::TestCaseGroup* createLocalSizeGroup(tcu::TestContext& testCtx, bool useLocalSizeId)
 934 {
 935         const char*             groupName[]{ "localsize", "localsize_id" };
 936
 937         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId], ""));
 938         ComputeShaderSpec                               spec;
 939         de::Random                                              rnd                             (deStringHash(group->getName()));
 940         const deUint32                                  numElements             = 64u;
 941         vector<float>                                   positiveFloats  (numElements, 0);
 942         vector<float>                                   negativeFloats  (numElements, 0);
 943
 944         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
 945
 946         for (size_t ndx = 0; ndx < numElements; ++ndx)
 947                 negativeFloats[ndx] = -positiveFloats[ndx];
 948
 949         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
 950         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
 951
 952         if (useLocalSizeId)
 953         {
 954                 spec.spirvVersion = SPIRV_VERSION_1_5;
 955                 spec.extensions.push_back("VK_KHR_maintenance4");
 956         }
 957
 958         spec.numWorkGroups = IVec3(numElements, 1, 1);
 959
 960         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, 1), 0u);
 961         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
 962
 963         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, 1), 0u);
 964         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
 965
 966         if (!useLocalSizeId)    // dont repeat this test when useLocalSizeId is true
 967         {
 968                 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, 1), 0u);
 969                 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
 970         }
 971
 972         spec.numWorkGroups = IVec3(1, 1, 1);
 973
 974         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(numElements, 1, 1), 0u);
 975         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
 976
 977         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(numElements, 1, 1), 0u);
 978         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
 979
 980         if (!useLocalSizeId)    // dont repeat this test when useLocalSizeId is true
 981         {
 982                 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(numElements, 1, 1), 0u);
 983                 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
 984         }
 985
 986         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, numElements, 1), 1u);
 987         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
 988
 989         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, numElements, 1), 1u);
 990         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
 991
 992         if (!useLocalSizeId)    // dont repeat this test when useLocalSizeId is true
 993         {
 994                 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, numElements, 1), 1u);
 995                 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
 996         }
 997
 998         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, numElements), 2u);
 999         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
1000
1001         spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, numElements), 2u);
1002         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
1003
1004         if (!useLocalSizeId)    // dont repeat this test when useLocalSizeId is true
1005         {
1006                 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, numElements), 2u);
1007                 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
1008         }
1009
1010         return group.release();
1011 }
1012
1013 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
1014 {
1015         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
1016         ComputeShaderSpec                               spec;
1017         de::Random                                              rnd                             (deStringHash(group->getName()));
1018         const int                                               numElements             = 100;
1019         vector<float>                                   positiveFloats  (numElements, 0);
1020         vector<float>                                   negativeFloats  (numElements, 0);
1021
1022         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1023
1024         for (size_t ndx = 0; ndx < numElements; ++ndx)
1025                 negativeFloats[ndx] = -positiveFloats[ndx];
1026
1027         spec.assembly =
1028                 string(getComputeAsmShaderPreamble()) +
1029
1030                 "OpSource GLSL 430\n"
1031                 "OpName %main           \"main\"\n"
1032                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1033
1034                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1035
1036                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1037
1038                 + string(getComputeAsmInputOutputBuffer()) +
1039
1040                 "%id        = OpVariable %uvec3ptr Input\n"
1041                 "%zero      = OpConstant %i32 0\n"
1042
1043                 "%main      = OpFunction %void None %voidf\n"
1044                 "%label     = OpLabel\n"
1045                 "%idval     = OpLoad %uvec3 %id\n"
1046                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1047
1048                 "             OpNop\n" // Inside a function body
1049
1050                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1051                 "%inval     = OpLoad %f32 %inloc\n"
1052                 "%neg       = OpFNegate %f32 %inval\n"
1053                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1054                 "             OpStore %outloc %neg\n"
1055                 "             OpReturn\n"
1056                 "             OpFunctionEnd\n";
1057         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1058         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1059         spec.numWorkGroups = IVec3(numElements, 1, 1);
1060
1061         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1062
1063         return group.release();
1064 }
1065
1066 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1067 {
1068         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1069         de::Random                                              rnd                             (deStringHash(group->getName()));
1070         const int                                               numElements             = 100;
1071         vector<float>                                   positiveFloats  (numElements, 0);
1072         vector<float>                                   negativeFloats  (numElements, 0);
1073
1074         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1075
1076         for (size_t ndx = 0; ndx < numElements; ++ndx)
1077                 negativeFloats[ndx] = -positiveFloats[ndx];
1078
1079         const VariableLocation                  testLocations[] =
1080         {
1081                 // Set          Binding
1082                 { 0,            5                       },
1083                 { 5,            5                       },
1084         };
1085
1086         for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1087         {
1088                 const VariableLocation& location = testLocations[locationNdx];
1089
1090                 // Unused variable.
1091                 {
1092                         ComputeShaderSpec                               spec;
1093
1094                         spec.assembly =
1095                                 string(getComputeAsmShaderPreamble()) +
1096
1097                                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1098
1099                                 + getUnusedDecorations(location)
1100
1101                                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1102
1103                                 + getUnusedTypesAndConstants()
1104
1105                                 + string(getComputeAsmInputOutputBuffer())
1106
1107                                 + getUnusedBuffer() +
1108
1109                                 "%id        = OpVariable %uvec3ptr Input\n"
1110                                 "%zero      = OpConstant %i32 0\n"
1111
1112                                 "%main      = OpFunction %void None %voidf\n"
1113                                 "%label     = OpLabel\n"
1114                                 "%idval     = OpLoad %uvec3 %id\n"
1115                                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1116
1117                                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1118                                 "%inval     = OpLoad %f32 %inloc\n"
1119                                 "%neg       = OpFNegate %f32 %inval\n"
1120                                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1121                                 "             OpStore %outloc %neg\n"
1122                                 "             OpReturn\n"
1123                                 "             OpFunctionEnd\n";
1124                         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1125                         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1126                         spec.numWorkGroups = IVec3(numElements, 1, 1);
1127
1128                         std::string testName            = "variable_" + location.toString();
1129                         std::string testDescription     = "Unused variable test with " + location.toDescription();
1130
1131                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1132                 }
1133
1134                 // Unused function.
1135                 {
1136                         ComputeShaderSpec                               spec;
1137
1138                         spec.assembly =
1139                                 string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1140
1141                                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1142
1143                                 + getUnusedDecorations(location)
1144
1145                                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1146
1147                                 + getUnusedTypesAndConstants() +
1148
1149                                 "%c_i32_0 = OpConstant %i32 0\n"
1150                                 "%c_i32_1 = OpConstant %i32 1\n"
1151
1152                                 + string(getComputeAsmInputOutputBuffer())
1153
1154                                 + getUnusedBuffer() +
1155
1156                                 "%id        = OpVariable %uvec3ptr Input\n"
1157                                 "%zero      = OpConstant %i32 0\n"
1158
1159                                 "%main      = OpFunction %void None %voidf\n"
1160                                 "%label     = OpLabel\n"
1161                                 "%idval     = OpLoad %uvec3 %id\n"
1162                                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1163
1164                                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1165                                 "%inval     = OpLoad %f32 %inloc\n"
1166                                 "%neg       = OpFNegate %f32 %inval\n"
1167                                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1168                                 "             OpStore %outloc %neg\n"
1169                                 "             OpReturn\n"
1170                                 "             OpFunctionEnd\n"
1171
1172                                 + getUnusedFunctionBody();
1173
1174                         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1175                         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1176                         spec.numWorkGroups = IVec3(numElements, 1, 1);
1177
1178                         std::string testName            = "function_" + location.toString();
1179                         std::string testDescription     = "Unused function test with " + location.toDescription();
1180
1181                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1182                 }
1183         }
1184
1185         return group.release();
1186 }
1187
1188 template<bool nanSupported>
1189 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1190 {
1191         if (outputAllocs.size() != 1)
1192                 return false;
1193
1194         vector<deUint8> input1Bytes;
1195         vector<deUint8> input2Bytes;
1196         vector<deUint8> expectedBytes;
1197
1198         inputs[0].getBytes(input1Bytes);
1199         inputs[1].getBytes(input2Bytes);
1200         expectedOutputs[0].getBytes(expectedBytes);
1201
1202         const deInt32* const    expectedOutputAsInt             = reinterpret_cast<const deInt32*>(&expectedBytes.front());
1203         const deInt32* const    outputAsInt                             = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1204         const float* const              input1AsFloat                   = reinterpret_cast<const float*>(&input1Bytes.front());
1205         const float* const              input2AsFloat                   = reinterpret_cast<const float*>(&input2Bytes.front());
1206         bool returnValue                                                                = true;
1207
1208         for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1209         {
1210                 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1211                         continue;
1212
1213                 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1214                 {
1215                         log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1216                         returnValue = false;
1217                 }
1218         }
1219         return returnValue;
1220 }
1221
1222 typedef VkBool32 (*compareFuncType) (float, float);
1223
1224 struct OpFUnordCase
1225 {
1226         const char*             name;
1227         const char*             opCode;
1228         compareFuncType compareFunc;
1229
1230                                         OpFUnordCase                    (const char* _name, const char* _opCode, compareFuncType _compareFunc)
1231                                                 : name                          (_name)
1232                                                 , opCode                        (_opCode)
1233                                                 , compareFunc           (_compareFunc) {}
1234 };
1235
1236 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1237 do { \
1238         struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1239         cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1240 } while (deGetFalse())
1241
1242 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1243 {
1244         const string                                    nan                             = testWithNan ? "_nan" : "";
1245         const string                                    groupName               = "opfunord" + nan;
1246         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1247         de::Random                                              rnd                             (deStringHash(group->getName()));
1248         const int                                               numElements             = 100;
1249         vector<OpFUnordCase>                    cases;
1250         string                                                  extensions              = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1251         string                                                  capabilities    = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1252         string                                                  exeModes                = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1253         const StringTemplate                    shaderTemplate  (
1254                 string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1255                 "OpSource GLSL 430\n"
1256                 "OpName %main           \"main\"\n"
1257                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1258
1259                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1260
1261                 "OpDecorate %buf BufferBlock\n"
1262                 "OpDecorate %buf2 BufferBlock\n"
1263                 "OpDecorate %indata1 DescriptorSet 0\n"
1264                 "OpDecorate %indata1 Binding 0\n"
1265                 "OpDecorate %indata2 DescriptorSet 0\n"
1266                 "OpDecorate %indata2 Binding 1\n"
1267                 "OpDecorate %outdata DescriptorSet 0\n"
1268                 "OpDecorate %outdata Binding 2\n"
1269                 "OpDecorate %f32arr ArrayStride 4\n"
1270                 "OpDecorate %i32arr ArrayStride 4\n"
1271                 "OpMemberDecorate %buf 0 Offset 0\n"
1272                 "OpMemberDecorate %buf2 0 Offset 0\n"
1273
1274                 + string(getComputeAsmCommonTypes()) +
1275
1276                 "%buf        = OpTypeStruct %f32arr\n"
1277                 "%bufptr     = OpTypePointer Uniform %buf\n"
1278                 "%indata1    = OpVariable %bufptr Uniform\n"
1279                 "%indata2    = OpVariable %bufptr Uniform\n"
1280
1281                 "%buf2       = OpTypeStruct %i32arr\n"
1282                 "%buf2ptr    = OpTypePointer Uniform %buf2\n"
1283                 "%outdata    = OpVariable %buf2ptr Uniform\n"
1284
1285                 "%id        = OpVariable %uvec3ptr Input\n"
1286                 "%zero      = OpConstant %i32 0\n"
1287                 "%consti1   = OpConstant %i32 1\n"
1288                 "%constf1   = OpConstant %f32 1.0\n"
1289
1290                 "%main      = OpFunction %void None %voidf\n"
1291                 "%label     = OpLabel\n"
1292                 "%idval     = OpLoad %uvec3 %id\n"
1293                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1294
1295                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1296                 "%inval1    = OpLoad %f32 %inloc1\n"
1297                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1298                 "%inval2    = OpLoad %f32 %inloc2\n"
1299                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1300
1301                 "%result    = ${OPCODE} %bool %inval1 %inval2\n"
1302                 "%int_res   = OpSelect %i32 %result %consti1 %zero\n"
1303                 "             OpStore %outloc %int_res\n"
1304
1305                 "             OpReturn\n"
1306                 "             OpFunctionEnd\n");
1307
1308         ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1309         ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1310         ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1311         ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1312         ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1313         ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1314
1315         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1316         {
1317                 map<string, string>                     specializations;
1318                 ComputeShaderSpec                       spec;
1319                 const float                                     NaN                             = std::numeric_limits<float>::quiet_NaN();
1320                 vector<float>                           inputFloats1    (numElements, 0);
1321                 vector<float>                           inputFloats2    (numElements, 0);
1322                 vector<deInt32>                         expectedInts    (numElements, 0);
1323
1324                 specializations["OPCODE"]       = cases[caseNdx].opCode;
1325                 spec.assembly                           = shaderTemplate.specialize(specializations);
1326
1327                 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1328                 for (size_t ndx = 0; ndx < numElements; ++ndx)
1329                 {
1330                         switch (ndx % 6)
1331                         {
1332                                 case 0:         inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1333                                 case 1:         inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1334                                 case 2:         inputFloats2[ndx] = inputFloats1[ndx]; break;
1335                                 case 3:         inputFloats2[ndx] = NaN; break;
1336                                 case 4:         inputFloats2[ndx] = inputFloats1[ndx];  inputFloats1[ndx] = NaN; break;
1337                                 case 5:         inputFloats2[ndx] = NaN;                                inputFloats1[ndx] = NaN; break;
1338                         }
1339                         expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1340                 }
1341
1342                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1343                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1344                 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1345                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
1346                 spec.verifyIO           = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1347
1348                 if (testWithNan)
1349                 {
1350                         spec.extensions.push_back("VK_KHR_shader_float_controls");
1351                         spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1352                 }
1353
1354                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1355         }
1356
1357         return group.release();
1358 }
1359
1360 struct OpAtomicCase
1361 {
1362         const char*             name;
1363         const char*             assembly;
1364         const char*             retValAssembly;
1365         OpAtomicType    opAtomic;
1366         deInt32                 numOutputElements;
1367
1368                                         OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1369                                                 : name                          (_name)
1370                                                 , assembly                      (_assembly)
1371                                                 , retValAssembly        (_retValAssembly)
1372                                                 , opAtomic                      (_opAtomic)
1373                                                 , numOutputElements     (_numOutputElements) {}
1374 };
1375
1376 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1377 {
1378         std::string                                             groupName                       ("opatomic");
1379         if (useStorageBuffer)
1380                 groupName += "_storage_buffer";
1381         if (verifyReturnValues)
1382                 groupName += "_return_values";
1383         if (volatileAtomic)
1384                 groupName += "_volatile";
1385         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1386         vector<OpAtomicCase>                    cases;
1387
1388         const StringTemplate                    shaderTemplate  (
1389
1390                 string("OpCapability Shader\n") +
1391                 (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1392                 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1393                 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1394                 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1395                 "OpEntryPoint GLCompute %main \"main\" %id\n"
1396                 "OpExecutionMode %main LocalSize 1 1 1\n" +
1397
1398                 "OpSource GLSL 430\n"
1399                 "OpName %main           \"main\"\n"
1400                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1401
1402                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1403
1404                 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1405                 "OpDecorate %indata DescriptorSet 0\n"
1406                 "OpDecorate %indata Binding 0\n"
1407                 "OpDecorate %i32arr ArrayStride 4\n"
1408                 "OpMemberDecorate %buf 0 Offset 0\n"
1409
1410                 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1411                 "OpDecorate %sum DescriptorSet 0\n"
1412                 "OpDecorate %sum Binding 1\n"
1413                 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1414
1415                 "${RETVAL_BUF_DECORATE}"
1416
1417                 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1418
1419                 "%buf       = OpTypeStruct %i32arr\n"
1420                 "%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1421                 "%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1422
1423                 "%sumbuf    = OpTypeStruct %i32arr\n"
1424                 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1425                 "%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1426
1427                 "${RETVAL_BUF_DECL}"
1428
1429                 "%id        = OpVariable %uvec3ptr Input\n"
1430                 "%minusone  = OpConstant %i32 -1\n"
1431                 "%zero      = OpConstant %i32 0\n"
1432                 "%one       = OpConstant %u32 1\n"
1433                 "%two       = OpConstant %i32 2\n"
1434                 "%five      = OpConstant %i32 5\n"
1435                 "%volbit    = OpConstant %i32 32768\n"
1436
1437                 "%main      = OpFunction %void None %voidf\n"
1438                 "%label     = OpLabel\n"
1439                 "%idval     = OpLoad %uvec3 %id\n"
1440                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1441
1442                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1443                 "%inval     = OpLoad %i32 %inloc\n"
1444
1445                 "%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1446                 "${INSTRUCTION}"
1447                 "${RETVAL_ASSEMBLY}"
1448
1449                 "             OpReturn\n"
1450                 "             OpFunctionEnd\n");
1451
1452         #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1453         do { \
1454                 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1455         } while (deGetFalse())
1456         #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1457         #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1458
1459         ADD_OPATOMIC_CASE_1(iadd,       "%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1460                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IADD );
1461         ADD_OPATOMIC_CASE_1(isub,       "%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1462                                                                 "             OpStore %retloc %retv\n", OPATOMIC_ISUB );
1463         ADD_OPATOMIC_CASE_1(iinc,       "%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1464                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IINC );
1465         ADD_OPATOMIC_CASE_1(idec,       "%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1466                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IDEC );
1467         if (!verifyReturnValues)
1468         {
1469                 ADD_OPATOMIC_CASE_N(load,       "%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1470                                                                         "             OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1471                 ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1472         }
1473
1474         ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
1475                                                                 "             OpStore %outloc %even\n"
1476                                                                 "%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1477                                                                 "                         OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1478
1479
1480         #undef ADD_OPATOMIC_CASE
1481         #undef ADD_OPATOMIC_CASE_1
1482         #undef ADD_OPATOMIC_CASE_N
1483
1484         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1485         {
1486                 map<string, string>                     specializations;
1487                 ComputeShaderSpec                       spec;
1488                 vector<deInt32>                         inputInts               (numElements, 0);
1489                 vector<deInt32>                         expected                (cases[caseNdx].numOutputElements, -1);
1490
1491                 if (volatileAtomic)
1492                 {
1493                         spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1494                         spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1495
1496                         // volatile, queuefamily scope
1497                         specializations["SEMANTICS"] = "%volbit";
1498                         specializations["SCOPE"] = "%five";
1499                 }
1500                 else
1501                 {
1502                         // non-volatile, device scope
1503                         specializations["SEMANTICS"] = "%zero";
1504                         specializations["SCOPE"] = "%one";
1505                 }
1506                 specializations["INDEX"]                                = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1507                 specializations["INSTRUCTION"]                  = cases[caseNdx].assembly;
1508                 specializations["BLOCK_DECORATION"]             = useStorageBuffer ? "Block" : "BufferBlock";
1509                 specializations["BLOCK_POINTER_TYPE"]   = useStorageBuffer ? "StorageBuffer" : "Uniform";
1510
1511                 if (verifyReturnValues)
1512                 {
1513                         const StringTemplate blockDecoration    (
1514                                 "\n"
1515                                 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1516                                 "OpDecorate %ret DescriptorSet 0\n"
1517                                 "OpDecorate %ret Binding 2\n"
1518                                 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1519
1520                         const StringTemplate blockDeclaration   (
1521                                 "\n"
1522                                 "%retbuf    = OpTypeStruct %i32arr\n"
1523                                 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1524                                 "%ret       = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1525
1526                         specializations["RETVAL_ASSEMBLY"] =
1527                                 "%retloc    = OpAccessChain %i32ptr %ret %zero %x\n"
1528                                 + std::string(cases[caseNdx].retValAssembly);
1529
1530                         specializations["RETVAL_BUF_DECORATE"]  = blockDecoration.specialize(specializations);
1531                         specializations["RETVAL_BUF_DECL"]              = blockDeclaration.specialize(specializations);
1532                 }
1533                 else
1534                 {
1535                         specializations["RETVAL_ASSEMBLY"]              = "";
1536                         specializations["RETVAL_BUF_DECORATE"]  = "";
1537                         specializations["RETVAL_BUF_DECL"]              = "";
1538                 }
1539
1540                 spec.assembly                                                   = shaderTemplate.specialize(specializations);
1541
1542                 // Specialize one more time, to catch things that were in a template parameter
1543                 const StringTemplate                                    assemblyTemplate(spec.assembly);
1544                 spec.assembly                                                   = assemblyTemplate.specialize(specializations);
1545
1546                 if (useStorageBuffer)
1547                         spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1548
1549                 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1550                 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1551                 if (verifyReturnValues)
1552                         spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1553                 spec.numWorkGroups = IVec3(numElements, 1, 1);
1554
1555                 if (verifyReturnValues)
1556                 {
1557                         switch (cases[caseNdx].opAtomic)
1558                         {
1559                                 case OPATOMIC_IADD:
1560                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1561                                         break;
1562                                 case OPATOMIC_ISUB:
1563                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1564                                         break;
1565                                 case OPATOMIC_IINC:
1566                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1567                                         break;
1568                                 case OPATOMIC_IDEC:
1569                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1570                                         break;
1571                                 case OPATOMIC_COMPEX:
1572                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1573                                         break;
1574                                 default:
1575                                         DE_FATAL("Unsupported OpAtomic type for return value verification");
1576                         }
1577                 }
1578                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1579         }
1580
1581         return group.release();
1582 }
1583
1584 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1585 {
1586         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1587         ComputeShaderSpec                               spec;
1588         de::Random                                              rnd                             (deStringHash(group->getName()));
1589         const int                                               numElements             = 100;
1590         vector<float>                                   positiveFloats  (numElements, 0);
1591         vector<float>                                   negativeFloats  (numElements, 0);
1592
1593         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1594
1595         for (size_t ndx = 0; ndx < numElements; ++ndx)
1596                 negativeFloats[ndx] = -positiveFloats[ndx];
1597
1598         spec.assembly =
1599                 string(getComputeAsmShaderPreamble()) +
1600
1601                 "%fname1 = OpString \"negateInputs.comp\"\n"
1602                 "%fname2 = OpString \"negateInputs\"\n"
1603
1604                 "OpSource GLSL 430\n"
1605                 "OpName %main           \"main\"\n"
1606                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1607
1608                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1609
1610                 + string(getComputeAsmInputOutputBufferTraits()) +
1611
1612                 "OpLine %fname1 0 0\n" // At the earliest possible position
1613
1614                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1615
1616                 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1617                 "OpLine %fname2 1 0\n" // Different filenames
1618                 "OpLine %fname1 1000 100000\n"
1619
1620                 "%id        = OpVariable %uvec3ptr Input\n"
1621                 "%zero      = OpConstant %i32 0\n"
1622
1623                 "OpLine %fname1 1 1\n" // Before a function
1624
1625                 "%main      = OpFunction %void None %voidf\n"
1626                 "%label     = OpLabel\n"
1627
1628                 "OpLine %fname1 1 1\n" // In a function
1629
1630                 "%idval     = OpLoad %uvec3 %id\n"
1631                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1632                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1633                 "%inval     = OpLoad %f32 %inloc\n"
1634                 "%neg       = OpFNegate %f32 %inval\n"
1635                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1636                 "             OpStore %outloc %neg\n"
1637                 "             OpReturn\n"
1638                 "             OpFunctionEnd\n";
1639         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1640         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1641         spec.numWorkGroups = IVec3(numElements, 1, 1);
1642
1643         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1644
1645         return group.release();
1646 }
1647
1648 bool veryfiBinaryShader (const ProgramBinary& binary)
1649 {
1650         const size_t    paternCount                     = 3u;
1651         bool paternsCheck[paternCount]          =
1652         {
1653                 false, false, false
1654         };
1655         const string patersns[paternCount]      =
1656         {
1657                 "VULKAN CTS",
1658                 "Negative values",
1659                 "Date: 2017/09/21"
1660         };
1661         size_t                  paternNdx               = 0u;
1662
1663         for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1664         {
1665                 if (false == paternsCheck[paternNdx] &&
1666                         patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1667                         deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1668                 {
1669                         paternsCheck[paternNdx]= true;
1670                         paternNdx++;
1671                         if (paternNdx == paternCount)
1672                                 break;
1673                 }
1674         }
1675
1676         for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1677         {
1678                 if (!paternsCheck[ndx])
1679                         return false;
1680         }
1681
1682         return true;
1683 }
1684
1685 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1686 {
1687         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1688         ComputeShaderSpec                               spec;
1689         de::Random                                              rnd                             (deStringHash(group->getName()));
1690         const int                                               numElements             = 10;
1691         vector<float>                                   positiveFloats  (numElements, 0);
1692         vector<float>                                   negativeFloats  (numElements, 0);
1693
1694         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1695
1696         for (size_t ndx = 0; ndx < numElements; ++ndx)
1697                 negativeFloats[ndx] = -positiveFloats[ndx];
1698
1699         spec.assembly =
1700                 string(getComputeAsmShaderPreamble()) +
1701                 "%fname = OpString \"negateInputs.comp\"\n"
1702
1703                 "OpSource GLSL 430\n"
1704                 "OpName %main           \"main\"\n"
1705                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1706                 "OpModuleProcessed \"VULKAN CTS\"\n"                                    //OpModuleProcessed;
1707                 "OpModuleProcessed \"Negative values\"\n"
1708                 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1709                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1710
1711                 + string(getComputeAsmInputOutputBufferTraits())
1712
1713                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1714
1715                 "OpLine %fname 0 1\n"
1716
1717                 "OpLine %fname 1000 1\n"
1718
1719                 "%id        = OpVariable %uvec3ptr Input\n"
1720                 "%zero      = OpConstant %i32 0\n"
1721                 "%main      = OpFunction %void None %voidf\n"
1722
1723                 "%label     = OpLabel\n"
1724                 "%idval     = OpLoad %uvec3 %id\n"
1725                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1726
1727                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1728                 "%inval     = OpLoad %f32 %inloc\n"
1729                 "%neg       = OpFNegate %f32 %inval\n"
1730                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1731                 "             OpStore %outloc %neg\n"
1732                 "             OpReturn\n"
1733                 "             OpFunctionEnd\n";
1734         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1735         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1736         spec.numWorkGroups = IVec3(numElements, 1, 1);
1737         spec.verifyBinary = veryfiBinaryShader;
1738         spec.spirvVersion = SPIRV_VERSION_1_3;
1739
1740         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1741
1742         return group.release();
1743 }
1744
1745 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1746 {
1747         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1748         ComputeShaderSpec                               spec;
1749         de::Random                                              rnd                             (deStringHash(group->getName()));
1750         const int                                               numElements             = 100;
1751         vector<float>                                   positiveFloats  (numElements, 0);
1752         vector<float>                                   negativeFloats  (numElements, 0);
1753
1754         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1755
1756         for (size_t ndx = 0; ndx < numElements; ++ndx)
1757                 negativeFloats[ndx] = -positiveFloats[ndx];
1758
1759         spec.assembly =
1760                 string(getComputeAsmShaderPreamble()) +
1761
1762                 "%fname = OpString \"negateInputs.comp\"\n"
1763
1764                 "OpSource GLSL 430\n"
1765                 "OpName %main           \"main\"\n"
1766                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1767
1768                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1769
1770                 + string(getComputeAsmInputOutputBufferTraits()) +
1771
1772                 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1773
1774                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1775
1776                 "OpLine %fname 0 1\n"
1777                 "OpNoLine\n" // Immediately following a preceding OpLine
1778
1779                 "OpLine %fname 1000 1\n"
1780
1781                 "%id        = OpVariable %uvec3ptr Input\n"
1782                 "%zero      = OpConstant %i32 0\n"
1783
1784                 "OpNoLine\n" // Contents after the previous OpLine
1785
1786                 "%main      = OpFunction %void None %voidf\n"
1787                 "%label     = OpLabel\n"
1788                 "%idval     = OpLoad %uvec3 %id\n"
1789                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1790
1791                 "OpNoLine\n" // Multiple OpNoLine
1792                 "OpNoLine\n"
1793                 "OpNoLine\n"
1794
1795                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1796                 "%inval     = OpLoad %f32 %inloc\n"
1797                 "%neg       = OpFNegate %f32 %inval\n"
1798                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1799                 "             OpStore %outloc %neg\n"
1800                 "             OpReturn\n"
1801                 "             OpFunctionEnd\n";
1802         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1803         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1804         spec.numWorkGroups = IVec3(numElements, 1, 1);
1805
1806         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1807
1808         return group.release();
1809 }
1810
1811 // Compare instruction for the contraction compute case.
1812 // Returns true if the output is what is expected from the test case.
1813 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1814 {
1815         if (outputAllocs.size() != 1)
1816                 return false;
1817
1818         // Only size is needed because we are not comparing the exact values.
1819         size_t byteSize = expectedOutputs[0].getByteSize();
1820
1821         const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1822
1823         for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1824                 if (outputAsFloat[i] != 0.f &&
1825                         outputAsFloat[i] != -ldexp(1, -24)) {
1826                         return false;
1827                 }
1828         }
1829
1830         return true;
1831 }
1832
1833 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1834 {
1835         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1836         vector<CaseParameter>                   cases;
1837         const int                                               numElements             = 100;
1838         vector<float>                                   inputFloats1    (numElements, 0);
1839         vector<float>                                   inputFloats2    (numElements, 0);
1840         vector<float>                                   outputFloats    (numElements, 0);
1841         const StringTemplate                    shaderTemplate  (
1842                 string(getComputeAsmShaderPreamble()) +
1843
1844                 "OpName %main           \"main\"\n"
1845                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1846
1847                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1848
1849                 "${DECORATION}\n"
1850
1851                 "OpDecorate %buf BufferBlock\n"
1852                 "OpDecorate %indata1 DescriptorSet 0\n"
1853                 "OpDecorate %indata1 Binding 0\n"
1854                 "OpDecorate %indata2 DescriptorSet 0\n"
1855                 "OpDecorate %indata2 Binding 1\n"
1856                 "OpDecorate %outdata DescriptorSet 0\n"
1857                 "OpDecorate %outdata Binding 2\n"
1858                 "OpDecorate %f32arr ArrayStride 4\n"
1859                 "OpMemberDecorate %buf 0 Offset 0\n"
1860
1861                 + string(getComputeAsmCommonTypes()) +
1862
1863                 "%buf        = OpTypeStruct %f32arr\n"
1864                 "%bufptr     = OpTypePointer Uniform %buf\n"
1865                 "%indata1    = OpVariable %bufptr Uniform\n"
1866                 "%indata2    = OpVariable %bufptr Uniform\n"
1867                 "%outdata    = OpVariable %bufptr Uniform\n"
1868
1869                 "%id         = OpVariable %uvec3ptr Input\n"
1870                 "%zero       = OpConstant %i32 0\n"
1871                 "%c_f_m1     = OpConstant %f32 -1.\n"
1872
1873                 "%main       = OpFunction %void None %voidf\n"
1874                 "%label      = OpLabel\n"
1875                 "%idval      = OpLoad %uvec3 %id\n"
1876                 "%x          = OpCompositeExtract %u32 %idval 0\n"
1877                 "%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
1878                 "%inval1     = OpLoad %f32 %inloc1\n"
1879                 "%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
1880                 "%inval2     = OpLoad %f32 %inloc2\n"
1881                 "%mul        = OpFMul %f32 %inval1 %inval2\n"
1882                 "%add        = OpFAdd %f32 %mul %c_f_m1\n"
1883                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1884                 "              OpStore %outloc %add\n"
1885                 "              OpReturn\n"
1886                 "              OpFunctionEnd\n");
1887
1888         cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1889         cases.push_back(CaseParameter("addition",               "OpDecorate %add NoContraction"));
1890         cases.push_back(CaseParameter("both",                   "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1891
1892         for (size_t ndx = 0; ndx < numElements; ++ndx)
1893         {
1894                 inputFloats1[ndx]       = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1895                 inputFloats2[ndx]       = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1896                 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1897                 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1898                 // So the final result will be 0.f or 0x1p-24.
1899                 // If the operation is combined into a precise fused multiply-add, then the result would be
1900                 // 2^-46 (0xa8800000).
1901                 outputFloats[ndx]       = 0.f;
1902         }
1903
1904         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1905         {
1906                 map<string, string>             specializations;
1907                 ComputeShaderSpec               spec;
1908
1909                 specializations["DECORATION"] = cases[caseNdx].param;
1910                 spec.assembly = shaderTemplate.specialize(specializations);
1911                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1912                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1913                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1914                 spec.numWorkGroups = IVec3(numElements, 1, 1);
1915                 // Check against the two possible answers based on rounding mode.
1916                 spec.verifyIO = &compareNoContractCase;
1917
1918                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1919         }
1920         return group.release();
1921 }
1922
1923 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1924 {
1925         if (outputAllocs.size() != 1)
1926                 return false;
1927
1928         vector<deUint8> expectedBytes;
1929         expectedOutputs[0].getBytes(expectedBytes);
1930
1931         const float*    expectedOutputAsFloat   = reinterpret_cast<const float*>(&expectedBytes.front());
1932         const float*    outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1933
1934         for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1935         {
1936                 const float f0 = expectedOutputAsFloat[idx];
1937                 const float f1 = outputAsFloat[idx];
1938                 // \todo relative error needs to be fairly high because FRem may be implemented as
1939                 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1940                 if (deFloatAbs((f1 - f0) / f0) > 0.02)
1941                         return false;
1942         }
1943
1944         return true;
1945 }
1946
1947 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1948 {
1949         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1950         ComputeShaderSpec                               spec;
1951         de::Random                                              rnd                             (deStringHash(group->getName()));
1952         const int                                               numElements             = 200;
1953         vector<float>                                   inputFloats1    (numElements, 0);
1954         vector<float>                                   inputFloats2    (numElements, 0);
1955         vector<float>                                   outputFloats    (numElements, 0);
1956
1957         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1958         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1959
1960         for (size_t ndx = 0; ndx < numElements; ++ndx)
1961         {
1962                 // Guard against divisors near zero.
1963                 if (std::fabs(inputFloats2[ndx]) < 1e-3)
1964                         inputFloats2[ndx] = 8.f;
1965
1966                 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1967                 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1968         }
1969
1970         spec.assembly =
1971                 string(getComputeAsmShaderPreamble()) +
1972
1973                 "OpName %main           \"main\"\n"
1974                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1975
1976                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1977
1978                 "OpDecorate %buf BufferBlock\n"
1979                 "OpDecorate %indata1 DescriptorSet 0\n"
1980                 "OpDecorate %indata1 Binding 0\n"
1981                 "OpDecorate %indata2 DescriptorSet 0\n"
1982                 "OpDecorate %indata2 Binding 1\n"
1983                 "OpDecorate %outdata DescriptorSet 0\n"
1984                 "OpDecorate %outdata Binding 2\n"
1985                 "OpDecorate %f32arr ArrayStride 4\n"
1986                 "OpMemberDecorate %buf 0 Offset 0\n"
1987
1988                 + string(getComputeAsmCommonTypes()) +
1989
1990                 "%buf        = OpTypeStruct %f32arr\n"
1991                 "%bufptr     = OpTypePointer Uniform %buf\n"
1992                 "%indata1    = OpVariable %bufptr Uniform\n"
1993                 "%indata2    = OpVariable %bufptr Uniform\n"
1994                 "%outdata    = OpVariable %bufptr Uniform\n"
1995
1996                 "%id        = OpVariable %uvec3ptr Input\n"
1997                 "%zero      = OpConstant %i32 0\n"
1998
1999                 "%main      = OpFunction %void None %voidf\n"
2000                 "%label     = OpLabel\n"
2001                 "%idval     = OpLoad %uvec3 %id\n"
2002                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2003                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2004                 "%inval1    = OpLoad %f32 %inloc1\n"
2005                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2006                 "%inval2    = OpLoad %f32 %inloc2\n"
2007                 "%rem       = OpFRem %f32 %inval1 %inval2\n"
2008                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2009                 "             OpStore %outloc %rem\n"
2010                 "             OpReturn\n"
2011                 "             OpFunctionEnd\n";
2012
2013         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2014         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2015         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2016         spec.numWorkGroups = IVec3(numElements, 1, 1);
2017         spec.verifyIO = &compareFRem;
2018
2019         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2020
2021         return group.release();
2022 }
2023
2024 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2025 {
2026         if (outputAllocs.size() != 1)
2027                 return false;
2028
2029         const BufferSp&                 expectedOutput                  (expectedOutputs[0].getBuffer());
2030         std::vector<deUint8>    data;
2031         expectedOutput->getBytes(data);
2032
2033         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2034         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2035
2036         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2037         {
2038                 const float f0 = expectedOutputAsFloat[idx];
2039                 const float f1 = outputAsFloat[idx];
2040
2041                 // For NMin, we accept NaN as output if both inputs were NaN.
2042                 // Otherwise the NaN is the wrong choise, as on architectures that
2043                 // do not handle NaN, those are huge values.
2044                 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2045                         return false;
2046         }
2047
2048         return true;
2049 }
2050
2051 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
2052 {
2053         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
2054         ComputeShaderSpec                               spec;
2055         de::Random                                              rnd                             (deStringHash(group->getName()));
2056         const int                                               numElements             = 200;
2057         vector<float>                                   inputFloats1    (numElements, 0);
2058         vector<float>                                   inputFloats2    (numElements, 0);
2059         vector<float>                                   outputFloats    (numElements, 0);
2060
2061         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2062         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2063
2064         // Make the first case a full-NAN case.
2065         inputFloats1[0] = TCU_NAN;
2066         inputFloats2[0] = TCU_NAN;
2067
2068         for (size_t ndx = 0; ndx < numElements; ++ndx)
2069         {
2070                 // By default, pick the smallest
2071                 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2072
2073                 // Make half of the cases NaN cases
2074                 if ((ndx & 1) == 0)
2075                 {
2076                         // Alternate between the NaN operand
2077                         if ((ndx & 2) == 0)
2078                         {
2079                                 outputFloats[ndx] = inputFloats2[ndx];
2080                                 inputFloats1[ndx] = TCU_NAN;
2081                         }
2082                         else
2083                         {
2084                                 outputFloats[ndx] = inputFloats1[ndx];
2085                                 inputFloats2[ndx] = TCU_NAN;
2086                         }
2087                 }
2088         }
2089
2090         spec.assembly =
2091                 "OpCapability Shader\n"
2092                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2093                 "OpMemoryModel Logical GLSL450\n"
2094                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2095                 "OpExecutionMode %main LocalSize 1 1 1\n"
2096
2097                 "OpName %main           \"main\"\n"
2098                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2099
2100                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2101
2102                 "OpDecorate %buf BufferBlock\n"
2103                 "OpDecorate %indata1 DescriptorSet 0\n"
2104                 "OpDecorate %indata1 Binding 0\n"
2105                 "OpDecorate %indata2 DescriptorSet 0\n"
2106                 "OpDecorate %indata2 Binding 1\n"
2107                 "OpDecorate %outdata DescriptorSet 0\n"
2108                 "OpDecorate %outdata Binding 2\n"
2109                 "OpDecorate %f32arr ArrayStride 4\n"
2110                 "OpMemberDecorate %buf 0 Offset 0\n"
2111
2112                 + string(getComputeAsmCommonTypes()) +
2113
2114                 "%buf        = OpTypeStruct %f32arr\n"
2115                 "%bufptr     = OpTypePointer Uniform %buf\n"
2116                 "%indata1    = OpVariable %bufptr Uniform\n"
2117                 "%indata2    = OpVariable %bufptr Uniform\n"
2118                 "%outdata    = OpVariable %bufptr Uniform\n"
2119
2120                 "%id        = OpVariable %uvec3ptr Input\n"
2121                 "%zero      = OpConstant %i32 0\n"
2122
2123                 "%main      = OpFunction %void None %voidf\n"
2124                 "%label     = OpLabel\n"
2125                 "%idval     = OpLoad %uvec3 %id\n"
2126                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2127                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2128                 "%inval1    = OpLoad %f32 %inloc1\n"
2129                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2130                 "%inval2    = OpLoad %f32 %inloc2\n"
2131                 "%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2132                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2133                 "             OpStore %outloc %rem\n"
2134                 "             OpReturn\n"
2135                 "             OpFunctionEnd\n";
2136
2137         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2138         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2139         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2140         spec.numWorkGroups = IVec3(numElements, 1, 1);
2141         spec.verifyIO = &compareNMin;
2142
2143         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2144
2145         return group.release();
2146 }
2147
2148 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2149 {
2150         if (outputAllocs.size() != 1)
2151                 return false;
2152
2153         const BufferSp&                 expectedOutput                  = expectedOutputs[0].getBuffer();
2154         std::vector<deUint8>    data;
2155         expectedOutput->getBytes(data);
2156
2157         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2158         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2159
2160         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2161         {
2162                 const float f0 = expectedOutputAsFloat[idx];
2163                 const float f1 = outputAsFloat[idx];
2164
2165                 // For NMax, NaN is considered acceptable result, since in
2166                 // architectures that do not handle NaNs, those are huge values.
2167                 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2168                         return false;
2169         }
2170
2171         return true;
2172 }
2173
2174 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2175 {
2176         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2177         ComputeShaderSpec                               spec;
2178         de::Random                                              rnd                             (deStringHash(group->getName()));
2179         const int                                               numElements             = 200;
2180         vector<float>                                   inputFloats1    (numElements, 0);
2181         vector<float>                                   inputFloats2    (numElements, 0);
2182         vector<float>                                   outputFloats    (numElements, 0);
2183
2184         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2185         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2186
2187         // Make the first case a full-NAN case.
2188         inputFloats1[0] = TCU_NAN;
2189         inputFloats2[0] = TCU_NAN;
2190
2191         for (size_t ndx = 0; ndx < numElements; ++ndx)
2192         {
2193                 // By default, pick the biggest
2194                 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2195
2196                 // Make half of the cases NaN cases
2197                 if ((ndx & 1) == 0)
2198                 {
2199                         // Alternate between the NaN operand
2200                         if ((ndx & 2) == 0)
2201                         {
2202                                 outputFloats[ndx] = inputFloats2[ndx];
2203                                 inputFloats1[ndx] = TCU_NAN;
2204                         }
2205                         else
2206                         {
2207                                 outputFloats[ndx] = inputFloats1[ndx];
2208                                 inputFloats2[ndx] = TCU_NAN;
2209                         }
2210                 }
2211         }
2212
2213         spec.assembly =
2214                 "OpCapability Shader\n"
2215                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2216                 "OpMemoryModel Logical GLSL450\n"
2217                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2218                 "OpExecutionMode %main LocalSize 1 1 1\n"
2219
2220                 "OpName %main           \"main\"\n"
2221                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2222
2223                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2224
2225                 "OpDecorate %buf BufferBlock\n"
2226                 "OpDecorate %indata1 DescriptorSet 0\n"
2227                 "OpDecorate %indata1 Binding 0\n"
2228                 "OpDecorate %indata2 DescriptorSet 0\n"
2229                 "OpDecorate %indata2 Binding 1\n"
2230                 "OpDecorate %outdata DescriptorSet 0\n"
2231                 "OpDecorate %outdata Binding 2\n"
2232                 "OpDecorate %f32arr ArrayStride 4\n"
2233                 "OpMemberDecorate %buf 0 Offset 0\n"
2234
2235                 + string(getComputeAsmCommonTypes()) +
2236
2237                 "%buf        = OpTypeStruct %f32arr\n"
2238                 "%bufptr     = OpTypePointer Uniform %buf\n"
2239                 "%indata1    = OpVariable %bufptr Uniform\n"
2240                 "%indata2    = OpVariable %bufptr Uniform\n"
2241                 "%outdata    = OpVariable %bufptr Uniform\n"
2242
2243                 "%id        = OpVariable %uvec3ptr Input\n"
2244                 "%zero      = OpConstant %i32 0\n"
2245
2246                 "%main      = OpFunction %void None %voidf\n"
2247                 "%label     = OpLabel\n"
2248                 "%idval     = OpLoad %uvec3 %id\n"
2249                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2250                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2251                 "%inval1    = OpLoad %f32 %inloc1\n"
2252                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2253                 "%inval2    = OpLoad %f32 %inloc2\n"
2254                 "%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2255                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2256                 "             OpStore %outloc %rem\n"
2257                 "             OpReturn\n"
2258                 "             OpFunctionEnd\n";
2259
2260         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2261         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2262         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2263         spec.numWorkGroups = IVec3(numElements, 1, 1);
2264         spec.verifyIO = &compareNMax;
2265
2266         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2267
2268         return group.release();
2269 }
2270
2271 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2272 {
2273         if (outputAllocs.size() != 1)
2274                 return false;
2275
2276         const BufferSp&                 expectedOutput                  = expectedOutputs[0].getBuffer();
2277         std::vector<deUint8>    data;
2278         expectedOutput->getBytes(data);
2279
2280         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2281         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2282
2283         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2284         {
2285                 const float e0 = expectedOutputAsFloat[idx * 2];
2286                 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2287                 const float res = outputAsFloat[idx];
2288
2289                 // For NClamp, we have two possible outcomes based on
2290                 // whether NaNs are handled or not.
2291                 // If either min or max value is NaN, the result is undefined,
2292                 // so this test doesn't stress those. If the clamped value is
2293                 // NaN, and NaNs are handled, the result is min; if NaNs are not
2294                 // handled, they are big values that result in max.
2295                 // If all three parameters are NaN, the result should be NaN.
2296                 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2297                          (deFloatAbs(e0 - res) < 0.00001f) ||
2298                          (deFloatAbs(e1 - res) < 0.00001f)))
2299                         return false;
2300         }
2301
2302         return true;
2303 }
2304
2305 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2306 {
2307         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2308         ComputeShaderSpec                               spec;
2309         de::Random                                              rnd                             (deStringHash(group->getName()));
2310         const int                                               numElements             = 200;
2311         vector<float>                                   inputFloats1    (numElements, 0);
2312         vector<float>                                   inputFloats2    (numElements, 0);
2313         vector<float>                                   inputFloats3    (numElements, 0);
2314         vector<float>                                   outputFloats    (numElements * 2, 0);
2315
2316         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2317         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2318         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2319
2320         for (size_t ndx = 0; ndx < numElements; ++ndx)
2321         {
2322                 // Results are only defined if max value is bigger than min value.
2323                 if (inputFloats2[ndx] > inputFloats3[ndx])
2324                 {
2325                         float t = inputFloats2[ndx];
2326                         inputFloats2[ndx] = inputFloats3[ndx];
2327                         inputFloats3[ndx] = t;
2328                 }
2329
2330                 // By default, do the clamp, setting both possible answers
2331                 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2332
2333                 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2334                 float maxResB = maxResA;
2335
2336                 // Alternate between the NaN cases
2337                 if (ndx & 1)
2338                 {
2339                         inputFloats1[ndx] = TCU_NAN;
2340                         // If NaN is handled, the result should be same as the clamp minimum.
2341                         // If NaN is not handled, the result should clamp to the clamp maximum.
2342                         maxResA = inputFloats2[ndx];
2343                         maxResB = inputFloats3[ndx];
2344                 }
2345                 else
2346                 {
2347                         // Not a NaN case - only one legal result.
2348                         maxResA = defaultRes;
2349                         maxResB = defaultRes;
2350                 }
2351
2352                 outputFloats[ndx * 2] = maxResA;
2353                 outputFloats[ndx * 2 + 1] = maxResB;
2354         }
2355
2356         // Make the first case a full-NAN case.
2357         inputFloats1[0] = TCU_NAN;
2358         inputFloats2[0] = TCU_NAN;
2359         inputFloats3[0] = TCU_NAN;
2360         outputFloats[0] = TCU_NAN;
2361         outputFloats[1] = TCU_NAN;
2362
2363         spec.assembly =
2364                 "OpCapability Shader\n"
2365                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2366                 "OpMemoryModel Logical GLSL450\n"
2367                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2368                 "OpExecutionMode %main LocalSize 1 1 1\n"
2369
2370                 "OpName %main           \"main\"\n"
2371                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2372
2373                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2374
2375                 "OpDecorate %buf BufferBlock\n"
2376                 "OpDecorate %indata1 DescriptorSet 0\n"
2377                 "OpDecorate %indata1 Binding 0\n"
2378                 "OpDecorate %indata2 DescriptorSet 0\n"
2379                 "OpDecorate %indata2 Binding 1\n"
2380                 "OpDecorate %indata3 DescriptorSet 0\n"
2381                 "OpDecorate %indata3 Binding 2\n"
2382                 "OpDecorate %outdata DescriptorSet 0\n"
2383                 "OpDecorate %outdata Binding 3\n"
2384                 "OpDecorate %f32arr ArrayStride 4\n"
2385                 "OpMemberDecorate %buf 0 Offset 0\n"
2386
2387                 + string(getComputeAsmCommonTypes()) +
2388
2389                 "%buf        = OpTypeStruct %f32arr\n"
2390                 "%bufptr     = OpTypePointer Uniform %buf\n"
2391                 "%indata1    = OpVariable %bufptr Uniform\n"
2392                 "%indata2    = OpVariable %bufptr Uniform\n"
2393                 "%indata3    = OpVariable %bufptr Uniform\n"
2394                 "%outdata    = OpVariable %bufptr Uniform\n"
2395
2396                 "%id        = OpVariable %uvec3ptr Input\n"
2397                 "%zero      = OpConstant %i32 0\n"
2398
2399                 "%main      = OpFunction %void None %voidf\n"
2400                 "%label     = OpLabel\n"
2401                 "%idval     = OpLoad %uvec3 %id\n"
2402                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2403                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2404                 "%inval1    = OpLoad %f32 %inloc1\n"
2405                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2406                 "%inval2    = OpLoad %f32 %inloc2\n"
2407                 "%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
2408                 "%inval3    = OpLoad %f32 %inloc3\n"
2409                 "%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2410                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2411                 "             OpStore %outloc %rem\n"
2412                 "             OpReturn\n"
2413                 "             OpFunctionEnd\n";
2414
2415         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2416         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2417         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2418         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2419         spec.numWorkGroups = IVec3(numElements, 1, 1);
2420         spec.verifyIO = &compareNClamp;
2421
2422         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2423
2424         return group.release();
2425 }
2426
2427 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2428 {
2429         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2430         de::Random                                              rnd                             (deStringHash(group->getName()));
2431         const int                                               numElements             = 200;
2432
2433         const struct CaseParams
2434         {
2435                 const char*             name;
2436                 const char*             failMessage;            // customized status message
2437                 qpTestResult    failResult;                     // override status on failure
2438                 int                             op1Min, op1Max;         // operand ranges
2439                 int                             op2Min, op2Max;
2440         } cases[] =
2441         {
2442                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
2443                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
2444         };
2445         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2446
2447         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2448         {
2449                 const CaseParams&       params          = cases[caseNdx];
2450                 ComputeShaderSpec       spec;
2451                 vector<deInt32>         inputInts1      (numElements, 0);
2452                 vector<deInt32>         inputInts2      (numElements, 0);
2453                 vector<deInt32>         outputInts      (numElements, 0);
2454
2455                 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2456                 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2457
2458                 for (int ndx = 0; ndx < numElements; ++ndx)
2459                 {
2460                         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2461                         outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2462                 }
2463
2464                 spec.assembly =
2465                         string(getComputeAsmShaderPreamble()) +
2466
2467                         "OpName %main           \"main\"\n"
2468                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2469
2470                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2471
2472                         "OpDecorate %buf BufferBlock\n"
2473                         "OpDecorate %indata1 DescriptorSet 0\n"
2474                         "OpDecorate %indata1 Binding 0\n"
2475                         "OpDecorate %indata2 DescriptorSet 0\n"
2476                         "OpDecorate %indata2 Binding 1\n"
2477                         "OpDecorate %outdata DescriptorSet 0\n"
2478                         "OpDecorate %outdata Binding 2\n"
2479                         "OpDecorate %i32arr ArrayStride 4\n"
2480                         "OpMemberDecorate %buf 0 Offset 0\n"
2481
2482                         + string(getComputeAsmCommonTypes()) +
2483
2484                         "%buf        = OpTypeStruct %i32arr\n"
2485                         "%bufptr     = OpTypePointer Uniform %buf\n"
2486                         "%indata1    = OpVariable %bufptr Uniform\n"
2487                         "%indata2    = OpVariable %bufptr Uniform\n"
2488                         "%outdata    = OpVariable %bufptr Uniform\n"
2489
2490                         "%id        = OpVariable %uvec3ptr Input\n"
2491                         "%zero      = OpConstant %i32 0\n"
2492
2493                         "%main      = OpFunction %void None %voidf\n"
2494                         "%label     = OpLabel\n"
2495                         "%idval     = OpLoad %uvec3 %id\n"
2496                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2497                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2498                         "%inval1    = OpLoad %i32 %inloc1\n"
2499                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2500                         "%inval2    = OpLoad %i32 %inloc2\n"
2501                         "%rem       = OpSRem %i32 %inval1 %inval2\n"
2502                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2503                         "             OpStore %outloc %rem\n"
2504                         "             OpReturn\n"
2505                         "             OpFunctionEnd\n";
2506
2507                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
2508                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
2509                 spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
2510                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2511                 spec.failResult                 = params.failResult;
2512                 spec.failMessage                = params.failMessage;
2513
2514                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2515         }
2516
2517         return group.release();
2518 }
2519
2520 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2521 {
2522         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2523         de::Random                                              rnd                             (deStringHash(group->getName()));
2524         const int                                               numElements             = 200;
2525
2526         const struct CaseParams
2527         {
2528                 const char*             name;
2529                 const char*             failMessage;            // customized status message
2530                 qpTestResult    failResult;                     // override status on failure
2531                 bool                    positive;
2532         } cases[] =
2533         {
2534                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
2535                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
2536         };
2537         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2538
2539         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2540         {
2541                 const CaseParams&       params          = cases[caseNdx];
2542                 ComputeShaderSpec       spec;
2543                 vector<deInt64>         inputInts1      (numElements, 0);
2544                 vector<deInt64>         inputInts2      (numElements, 0);
2545                 vector<deInt64>         outputInts      (numElements, 0);
2546
2547                 if (params.positive)
2548                 {
2549                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2550                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2551                 }
2552                 else
2553                 {
2554                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2555                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2556                 }
2557
2558                 for (int ndx = 0; ndx < numElements; ++ndx)
2559                 {
2560                         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2561                         outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2562                 }
2563
2564                 spec.assembly =
2565                         "OpCapability Int64\n"
2566
2567                         + string(getComputeAsmShaderPreamble()) +
2568
2569                         "OpName %main           \"main\"\n"
2570                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2571
2572                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2573
2574                         "OpDecorate %buf BufferBlock\n"
2575                         "OpDecorate %indata1 DescriptorSet 0\n"
2576                         "OpDecorate %indata1 Binding 0\n"
2577                         "OpDecorate %indata2 DescriptorSet 0\n"
2578                         "OpDecorate %indata2 Binding 1\n"
2579                         "OpDecorate %outdata DescriptorSet 0\n"
2580                         "OpDecorate %outdata Binding 2\n"
2581                         "OpDecorate %i64arr ArrayStride 8\n"
2582                         "OpMemberDecorate %buf 0 Offset 0\n"
2583
2584                         + string(getComputeAsmCommonTypes())
2585                         + string(getComputeAsmCommonInt64Types()) +
2586
2587                         "%buf        = OpTypeStruct %i64arr\n"
2588                         "%bufptr     = OpTypePointer Uniform %buf\n"
2589                         "%indata1    = OpVariable %bufptr Uniform\n"
2590                         "%indata2    = OpVariable %bufptr Uniform\n"
2591                         "%outdata    = OpVariable %bufptr Uniform\n"
2592
2593                         "%id        = OpVariable %uvec3ptr Input\n"
2594                         "%zero      = OpConstant %i64 0\n"
2595
2596                         "%main      = OpFunction %void None %voidf\n"
2597                         "%label     = OpLabel\n"
2598                         "%idval     = OpLoad %uvec3 %id\n"
2599                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2600                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2601                         "%inval1    = OpLoad %i64 %inloc1\n"
2602                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2603                         "%inval2    = OpLoad %i64 %inloc2\n"
2604                         "%rem       = OpSRem %i64 %inval1 %inval2\n"
2605                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2606                         "             OpStore %outloc %rem\n"
2607                         "             OpReturn\n"
2608                         "             OpFunctionEnd\n";
2609
2610                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
2611                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
2612                 spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
2613                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2614                 spec.failResult                 = params.failResult;
2615                 spec.failMessage                = params.failMessage;
2616
2617                 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2618
2619                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2620         }
2621
2622         return group.release();
2623 }
2624
2625 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2626 {
2627         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2628         de::Random                                              rnd                             (deStringHash(group->getName()));
2629         const int                                               numElements             = 200;
2630
2631         const struct CaseParams
2632         {
2633                 const char*             name;
2634                 const char*             failMessage;            // customized status message
2635                 qpTestResult    failResult;                     // override status on failure
2636                 int                             op1Min, op1Max;         // operand ranges
2637                 int                             op2Min, op2Max;
2638         } cases[] =
2639         {
2640                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
2641                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
2642         };
2643         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2644
2645         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2646         {
2647                 const CaseParams&       params          = cases[caseNdx];
2648
2649                 ComputeShaderSpec       spec;
2650                 vector<deInt32>         inputInts1      (numElements, 0);
2651                 vector<deInt32>         inputInts2      (numElements, 0);
2652                 vector<deInt32>         outputInts      (numElements, 0);
2653
2654                 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2655                 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2656
2657                 for (int ndx = 0; ndx < numElements; ++ndx)
2658                 {
2659                         deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2660                         if (rem == 0)
2661                         {
2662                                 outputInts[ndx] = 0;
2663                         }
2664                         else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2665                         {
2666                                 // They have the same sign
2667                                 outputInts[ndx] = rem;
2668                         }
2669                         else
2670                         {
2671                                 // They have opposite sign.  The remainder operation takes the
2672                                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2673                                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2674                                 // the result has the correct sign and that it is still
2675                                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2676                                 //
2677                                 // See also http://mathforum.org/library/drmath/view/52343.html
2678                                 outputInts[ndx] = rem + inputInts2[ndx];
2679                         }
2680                 }
2681
2682                 spec.assembly =
2683                         string(getComputeAsmShaderPreamble()) +
2684
2685                         "OpName %main           \"main\"\n"
2686                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2687
2688                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2689
2690                         "OpDecorate %buf BufferBlock\n"
2691                         "OpDecorate %indata1 DescriptorSet 0\n"
2692                         "OpDecorate %indata1 Binding 0\n"
2693                         "OpDecorate %indata2 DescriptorSet 0\n"
2694                         "OpDecorate %indata2 Binding 1\n"
2695                         "OpDecorate %outdata DescriptorSet 0\n"
2696                         "OpDecorate %outdata Binding 2\n"
2697                         "OpDecorate %i32arr ArrayStride 4\n"
2698                         "OpMemberDecorate %buf 0 Offset 0\n"
2699
2700                         + string(getComputeAsmCommonTypes()) +
2701
2702                         "%buf        = OpTypeStruct %i32arr\n"
2703                         "%bufptr     = OpTypePointer Uniform %buf\n"
2704                         "%indata1    = OpVariable %bufptr Uniform\n"
2705                         "%indata2    = OpVariable %bufptr Uniform\n"
2706                         "%outdata    = OpVariable %bufptr Uniform\n"
2707
2708                         "%id        = OpVariable %uvec3ptr Input\n"
2709                         "%zero      = OpConstant %i32 0\n"
2710
2711                         "%main      = OpFunction %void None %voidf\n"
2712                         "%label     = OpLabel\n"
2713                         "%idval     = OpLoad %uvec3 %id\n"
2714                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2715                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2716                         "%inval1    = OpLoad %i32 %inloc1\n"
2717                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2718                         "%inval2    = OpLoad %i32 %inloc2\n"
2719                         "%rem       = OpSMod %i32 %inval1 %inval2\n"
2720                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2721                         "             OpStore %outloc %rem\n"
2722                         "             OpReturn\n"
2723                         "             OpFunctionEnd\n";
2724
2725                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
2726                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
2727                 spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
2728                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2729                 spec.failResult                 = params.failResult;
2730                 spec.failMessage                = params.failMessage;
2731
2732                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2733         }
2734
2735         return group.release();
2736 }
2737
2738 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2739 {
2740         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2741         de::Random                                              rnd                             (deStringHash(group->getName()));
2742         const int                                               numElements             = 200;
2743
2744         const struct CaseParams
2745         {
2746                 const char*             name;
2747                 const char*             failMessage;            // customized status message
2748                 qpTestResult    failResult;                     // override status on failure
2749                 bool                    positive;
2750         } cases[] =
2751         {
2752                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
2753                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
2754         };
2755         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2756
2757         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2758         {
2759                 const CaseParams&       params          = cases[caseNdx];
2760
2761                 ComputeShaderSpec       spec;
2762                 vector<deInt64>         inputInts1      (numElements, 0);
2763                 vector<deInt64>         inputInts2      (numElements, 0);
2764                 vector<deInt64>         outputInts      (numElements, 0);
2765
2766
2767                 if (params.positive)
2768                 {
2769                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2770                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2771                 }
2772                 else
2773                 {
2774                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2775                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2776                 }
2777
2778                 for (int ndx = 0; ndx < numElements; ++ndx)
2779                 {
2780                         deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2781                         if (rem == 0)
2782                         {
2783                                 outputInts[ndx] = 0;
2784                         }
2785                         else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2786                         {
2787                                 // They have the same sign
2788                                 outputInts[ndx] = rem;
2789                         }
2790                         else
2791                         {
2792                                 // They have opposite sign.  The remainder operation takes the
2793                                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2794                                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2795                                 // the result has the correct sign and that it is still
2796                                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2797                                 //
2798                                 // See also http://mathforum.org/library/drmath/view/52343.html
2799                                 outputInts[ndx] = rem + inputInts2[ndx];
2800                         }
2801                 }
2802
2803                 spec.assembly =
2804                         "OpCapability Int64\n"
2805
2806                         + string(getComputeAsmShaderPreamble()) +
2807
2808                         "OpName %main           \"main\"\n"
2809                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2810
2811                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2812
2813                         "OpDecorate %buf BufferBlock\n"
2814                         "OpDecorate %indata1 DescriptorSet 0\n"
2815                         "OpDecorate %indata1 Binding 0\n"
2816                         "OpDecorate %indata2 DescriptorSet 0\n"
2817                         "OpDecorate %indata2 Binding 1\n"
2818                         "OpDecorate %outdata DescriptorSet 0\n"
2819                         "OpDecorate %outdata Binding 2\n"
2820                         "OpDecorate %i64arr ArrayStride 8\n"
2821                         "OpMemberDecorate %buf 0 Offset 0\n"
2822
2823                         + string(getComputeAsmCommonTypes())
2824                         + string(getComputeAsmCommonInt64Types()) +
2825
2826                         "%buf        = OpTypeStruct %i64arr\n"
2827                         "%bufptr     = OpTypePointer Uniform %buf\n"
2828                         "%indata1    = OpVariable %bufptr Uniform\n"
2829                         "%indata2    = OpVariable %bufptr Uniform\n"
2830                         "%outdata    = OpVariable %bufptr Uniform\n"
2831
2832                         "%id        = OpVariable %uvec3ptr Input\n"
2833                         "%zero      = OpConstant %i64 0\n"
2834
2835                         "%main      = OpFunction %void None %voidf\n"
2836                         "%label     = OpLabel\n"
2837                         "%idval     = OpLoad %uvec3 %id\n"
2838                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2839                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2840                         "%inval1    = OpLoad %i64 %inloc1\n"
2841                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2842                         "%inval2    = OpLoad %i64 %inloc2\n"
2843                         "%rem       = OpSMod %i64 %inval1 %inval2\n"
2844                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2845                         "             OpStore %outloc %rem\n"
2846                         "             OpReturn\n"
2847                         "             OpFunctionEnd\n";
2848
2849                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
2850                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
2851                 spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
2852                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2853                 spec.failResult                 = params.failResult;
2854                 spec.failMessage                = params.failMessage;
2855
2856                 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2857
2858                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2859         }
2860
2861         return group.release();
2862 }
2863
2864 // Copy contents in the input buffer to the output buffer.
2865 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2866 {
2867         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2868         de::Random                                              rnd                             (deStringHash(group->getName()));
2869         const int                                               numElements             = 100;
2870
2871         // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2872         ComputeShaderSpec                               spec1;
2873         vector<Vec4>                                    inputFloats1    (numElements);
2874         vector<Vec4>                                    outputFloats1   (numElements);
2875
2876         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2877
2878         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2879         floorAll(inputFloats1);
2880
2881         for (size_t ndx = 0; ndx < numElements; ++ndx)
2882                 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2883
2884         spec1.assembly =
2885                 string(getComputeAsmShaderPreamble()) +
2886
2887                 "OpName %main           \"main\"\n"
2888                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2889
2890                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2891                 "OpDecorate %vec4arr ArrayStride 16\n"
2892
2893                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2894
2895                 "%vec4       = OpTypeVector %f32 4\n"
2896                 "%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
2897                 "%vec4ptr_f  = OpTypePointer Function %vec4\n"
2898                 "%vec4arr    = OpTypeRuntimeArray %vec4\n"
2899                 "%buf        = OpTypeStruct %vec4arr\n"
2900                 "%bufptr     = OpTypePointer Uniform %buf\n"
2901                 "%indata     = OpVariable %bufptr Uniform\n"
2902                 "%outdata    = OpVariable %bufptr Uniform\n"
2903
2904                 "%id         = OpVariable %uvec3ptr Input\n"
2905                 "%zero       = OpConstant %i32 0\n"
2906                 "%c_f_0      = OpConstant %f32 0.\n"
2907                 "%c_f_0_5    = OpConstant %f32 0.5\n"
2908                 "%c_f_1_5    = OpConstant %f32 1.5\n"
2909                 "%c_f_2_5    = OpConstant %f32 2.5\n"
2910                 "%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2911
2912                 "%main       = OpFunction %void None %voidf\n"
2913                 "%label      = OpLabel\n"
2914                 "%v_vec4     = OpVariable %vec4ptr_f Function\n"
2915                 "%idval      = OpLoad %uvec3 %id\n"
2916                 "%x          = OpCompositeExtract %u32 %idval 0\n"
2917                 "%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2918                 "%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2919                 "              OpCopyMemory %v_vec4 %inloc\n"
2920                 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2921                 "%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2922                 "              OpStore %outloc %add\n"
2923                 "              OpReturn\n"
2924                 "              OpFunctionEnd\n";
2925
2926         spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2927         spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2928         spec1.numWorkGroups = IVec3(numElements, 1, 1);
2929
2930         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2931
2932         // The following case copies a float[100] variable from the input buffer to the output buffer.
2933         ComputeShaderSpec                               spec2;
2934         vector<float>                                   inputFloats2    (numElements);
2935         vector<float>                                   outputFloats2   (numElements);
2936
2937         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2938
2939         for (size_t ndx = 0; ndx < numElements; ++ndx)
2940                 outputFloats2[ndx] = inputFloats2[ndx];
2941
2942         spec2.assembly =
2943                 string(getComputeAsmShaderPreamble()) +
2944
2945                 "OpName %main           \"main\"\n"
2946                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2947
2948                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2949                 "OpDecorate %f32arr100 ArrayStride 4\n"
2950
2951                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2952
2953                 "%hundred        = OpConstant %u32 100\n"
2954                 "%f32arr100      = OpTypeArray %f32 %hundred\n"
2955                 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2956                 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2957                 "%buf            = OpTypeStruct %f32arr100\n"
2958                 "%bufptr         = OpTypePointer Uniform %buf\n"
2959                 "%indata         = OpVariable %bufptr Uniform\n"
2960                 "%outdata        = OpVariable %bufptr Uniform\n"
2961
2962                 "%id             = OpVariable %uvec3ptr Input\n"
2963                 "%zero           = OpConstant %i32 0\n"
2964
2965                 "%main           = OpFunction %void None %voidf\n"
2966                 "%label          = OpLabel\n"
2967                 "%var            = OpVariable %f32arr100ptr_f Function\n"
2968                 "%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2969                 "%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2970                 "                  OpCopyMemory %var %inarr\n"
2971                 "                  OpCopyMemory %outarr %var\n"
2972                 "                  OpReturn\n"
2973                 "                  OpFunctionEnd\n";
2974
2975         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2976         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2977         spec2.numWorkGroups = IVec3(1, 1, 1);
2978
2979         group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2980
2981         // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2982         ComputeShaderSpec                               spec3;
2983         vector<float>                                   inputFloats3    (16);
2984         vector<float>                                   outputFloats3   (16);
2985
2986         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2987
2988         for (size_t ndx = 0; ndx < 16; ++ndx)
2989                 outputFloats3[ndx] = inputFloats3[ndx];
2990
2991         spec3.assembly =
2992                 string(getComputeAsmShaderPreamble()) +
2993
2994                 "OpName %main           \"main\"\n"
2995                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2996
2997                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2998                 //"OpMemberDecorate %buf 0 Offset 0\n"  - exists in getComputeAsmInputOutputBufferTraits
2999                 "OpMemberDecorate %buf 1 Offset 16\n"
3000                 "OpMemberDecorate %buf 2 Offset 32\n"
3001                 "OpMemberDecorate %buf 3 Offset 48\n"
3002
3003                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3004
3005                 "%vec4      = OpTypeVector %f32 4\n"
3006                 "%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3007                 "%bufptr    = OpTypePointer Uniform %buf\n"
3008                 "%indata    = OpVariable %bufptr Uniform\n"
3009                 "%outdata   = OpVariable %bufptr Uniform\n"
3010                 "%vec4stptr = OpTypePointer Function %buf\n"
3011
3012                 "%id        = OpVariable %uvec3ptr Input\n"
3013                 "%zero      = OpConstant %i32 0\n"
3014
3015                 "%main      = OpFunction %void None %voidf\n"
3016                 "%label     = OpLabel\n"
3017                 "%var       = OpVariable %vec4stptr Function\n"
3018                 "             OpCopyMemory %var %indata\n"
3019                 "             OpCopyMemory %outdata %var\n"
3020                 "             OpReturn\n"
3021                 "             OpFunctionEnd\n";
3022
3023         spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3024         spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3025         spec3.numWorkGroups = IVec3(1, 1, 1);
3026
3027         group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
3028
3029         // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3030         ComputeShaderSpec                               spec4;
3031         vector<float>                                   inputFloats4    (numElements);
3032         vector<float>                                   outputFloats4   (numElements);
3033
3034         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3035
3036         for (size_t ndx = 0; ndx < numElements; ++ndx)
3037                 outputFloats4[ndx] = -inputFloats4[ndx];
3038
3039         spec4.assembly =
3040                 string(getComputeAsmShaderPreamble()) +
3041
3042                 "OpName %main           \"main\"\n"
3043                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3044
3045                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3046
3047                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3048
3049                 "%f32ptr_f  = OpTypePointer Function %f32\n"
3050                 "%id        = OpVariable %uvec3ptr Input\n"
3051                 "%zero      = OpConstant %i32 0\n"
3052
3053                 "%main      = OpFunction %void None %voidf\n"
3054                 "%label     = OpLabel\n"
3055                 "%var       = OpVariable %f32ptr_f Function\n"
3056                 "%idval     = OpLoad %uvec3 %id\n"
3057                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3058                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3059                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3060                 "             OpCopyMemory %var %inloc\n"
3061                 "%val       = OpLoad %f32 %var\n"
3062                 "%neg       = OpFNegate %f32 %val\n"
3063                 "             OpStore %outloc %neg\n"
3064                 "             OpReturn\n"
3065                 "             OpFunctionEnd\n";
3066
3067         spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3068         spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3069         spec4.numWorkGroups = IVec3(numElements, 1, 1);
3070
3071         group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3072
3073         return group.release();
3074 }
3075
3076 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3077 {
3078         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3079         ComputeShaderSpec                               spec;
3080         de::Random                                              rnd                             (deStringHash(group->getName()));
3081         const int                                               numElements             = 100;
3082         vector<float>                                   inputFloats             (numElements, 0);
3083         vector<float>                                   outputFloats    (numElements, 0);
3084
3085         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3086
3087         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3088         floorAll(inputFloats);
3089
3090         for (size_t ndx = 0; ndx < numElements; ++ndx)
3091                 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3092
3093         spec.assembly =
3094                 string(getComputeAsmShaderPreamble()) +
3095
3096                 "OpName %main           \"main\"\n"
3097                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3098
3099                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3100
3101                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3102
3103                 "%fmat     = OpTypeMatrix %fvec3 3\n"
3104                 "%three    = OpConstant %u32 3\n"
3105                 "%farr     = OpTypeArray %f32 %three\n"
3106                 "%fst      = OpTypeStruct %f32 %f32\n"
3107
3108                 + string(getComputeAsmInputOutputBuffer()) +
3109
3110                 "%id            = OpVariable %uvec3ptr Input\n"
3111                 "%zero          = OpConstant %i32 0\n"
3112                 "%c_f           = OpConstant %f32 1.5\n"
3113                 "%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3114                 "%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3115                 "%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
3116                 "%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
3117
3118                 "%main          = OpFunction %void None %voidf\n"
3119                 "%label         = OpLabel\n"
3120                 "%c_f_copy      = OpCopyObject %f32   %c_f\n"
3121                 "%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
3122                 "%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
3123                 "%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
3124                 "%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
3125                 "%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3126                 "%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3127                 "%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
3128                 "%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
3129                 // Add up. 1.5 * 5 = 7.5.
3130                 "%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3131                 "%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
3132                 "%add3          = OpFAdd %f32 %add2     %farr_elem\n"
3133                 "%add4          = OpFAdd %f32 %add3     %fst_elem\n"
3134
3135                 "%idval         = OpLoad %uvec3 %id\n"
3136                 "%x             = OpCompositeExtract %u32 %idval 0\n"
3137                 "%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
3138                 "%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
3139                 "%inval         = OpLoad %f32 %inloc\n"
3140                 "%add           = OpFAdd %f32 %add4 %inval\n"
3141                 "                 OpStore %outloc %add\n"
3142                 "                 OpReturn\n"
3143                 "                 OpFunctionEnd\n";
3144         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3145         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3146         spec.numWorkGroups = IVec3(numElements, 1, 1);
3147
3148         group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3149
3150         return group.release();
3151 }
3152 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3153 //
3154 // #version 430
3155 //
3156 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3157 //   float elements[];
3158 // } input_data;
3159 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3160 //   float elements[];
3161 // } output_data;
3162 //
3163 // void not_called_func() {
3164 //   // place OpUnreachable here
3165 // }
3166 //
3167 // uint modulo4(uint val) {
3168 //   switch (val % uint(4)) {
3169 //     case 0:  return 3;
3170 //     case 1:  return 2;
3171 //     case 2:  return 1;
3172 //     case 3:  return 0;
3173 //     default: return 100; // place OpUnreachable here
3174 //   }
3175 // }
3176 //
3177 // uint const5() {
3178 //   return 5;
3179 //   // place OpUnreachable here
3180 // }
3181 //
3182 // void main() {
3183 //   uint x = gl_GlobalInvocationID.x;
3184 //   if (const5() > modulo4(1000)) {
3185 //     output_data.elements[x] = -input_data.elements[x];
3186 //   } else {
3187 //     // place OpUnreachable here
3188 //     output_data.elements[x] = input_data.elements[x];
3189 //   }
3190 // }
3191
3192 void addOpUnreachableAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3193 {
3194 #ifndef CTS_USES_VULKANSC
3195         static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3196
3197         struct Case
3198         {
3199                 string  name;
3200                 string  desc;
3201         };
3202
3203         static const Case cases[] =
3204         {
3205                 { "unreachable-switch-merge-in-loop",   "Test containing an unreachable switch merge block inside an infinite loop"     },
3206         };
3207
3208         for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3209         {
3210                 const string fileName = cases[i].name + ".amber";
3211                 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3212         }
3213 #else
3214         DE_UNREF(group);
3215         DE_UNREF(testCtx);
3216 #endif
3217 }
3218
3219 void addOpSwitchAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3220 {
3221 #ifndef CTS_USES_VULKANSC
3222         static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3223
3224         struct Case
3225         {
3226                 string  name;
3227                 string  desc;
3228         };
3229
3230         static const Case cases[] =
3231         {
3232                 { "switch-case-to-merge-block", "Test switch containing a case that jumps directly to the merge block"  },
3233         };
3234
3235         for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3236         {
3237                 const string fileName = cases[i].name + ".amber";
3238                 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3239         }
3240 #else
3241         DE_UNREF(group);
3242         DE_UNREF(testCtx);
3243 #endif
3244 }
3245
3246 #ifndef CTS_USES_VULKANSC
3247 tcu::TestCaseGroup* createOpArrayLengthComputeGroup (tcu::TestContext& testCtx)
3248 {
3249         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "oparraylength", "Test the OpArrayLength instruction"));
3250         static const char                               dataDir[]       = "spirv_assembly/instruction/compute/arraylength";
3251
3252         struct Case
3253         {
3254                 string  name;
3255                 string  desc;
3256         };
3257
3258         static const Case cases[] =
3259         {
3260                 { "array-stride-larger-than-element-size",      "Test using an unsized array with stride larger than the element size"  }
3261         };
3262
3263         for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3264         {
3265                 const string fileName = cases[i].name + ".amber";
3266                 group->addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3267         }
3268
3269         return group.release();
3270 }
3271 #endif
3272
3273 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3274 {
3275         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3276         ComputeShaderSpec                               spec;
3277         de::Random                                              rnd                             (deStringHash(group->getName()));
3278         const int                                               numElements             = 100;
3279         vector<float>                                   positiveFloats  (numElements, 0);
3280         vector<float>                                   negativeFloats  (numElements, 0);
3281
3282         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3283
3284         for (size_t ndx = 0; ndx < numElements; ++ndx)
3285                 negativeFloats[ndx] = -positiveFloats[ndx];
3286
3287         spec.assembly =
3288                 string(getComputeAsmShaderPreamble()) +
3289
3290                 "OpSource GLSL 430\n"
3291                 "OpName %main            \"main\"\n"
3292                 "OpName %func_not_called_func \"not_called_func(\"\n"
3293                 "OpName %func_modulo4         \"modulo4(u1;\"\n"
3294                 "OpName %func_const5          \"const5(\"\n"
3295                 "OpName %id                   \"gl_GlobalInvocationID\"\n"
3296
3297                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3298
3299                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3300
3301                 "%u32ptr    = OpTypePointer Function %u32\n"
3302                 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3303                 "%unitf     = OpTypeFunction %u32\n"
3304
3305                 "%id        = OpVariable %uvec3ptr Input\n"
3306                 "%zero      = OpConstant %u32 0\n"
3307                 "%one       = OpConstant %u32 1\n"
3308                 "%two       = OpConstant %u32 2\n"
3309                 "%three     = OpConstant %u32 3\n"
3310                 "%four      = OpConstant %u32 4\n"
3311                 "%five      = OpConstant %u32 5\n"
3312                 "%hundred   = OpConstant %u32 100\n"
3313                 "%thousand  = OpConstant %u32 1000\n"
3314
3315                 + string(getComputeAsmInputOutputBuffer()) +
3316
3317                 // Main()
3318                 "%main   = OpFunction %void None %voidf\n"
3319                 "%main_entry  = OpLabel\n"
3320                 "%v_thousand  = OpVariable %u32ptr Function %thousand\n"
3321                 "%idval       = OpLoad %uvec3 %id\n"
3322                 "%x           = OpCompositeExtract %u32 %idval 0\n"
3323                 "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
3324                 "%inval       = OpLoad %f32 %inloc\n"
3325                 "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
3326                 "%ret_const5  = OpFunctionCall %u32 %func_const5\n"
3327                 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3328                 "%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3329                 "               OpSelectionMerge %if_end None\n"
3330                 "               OpBranchConditional %cmp_gt %if_true %if_false\n"
3331                 "%if_true     = OpLabel\n"
3332                 "%negate      = OpFNegate %f32 %inval\n"
3333                 "               OpStore %outloc %negate\n"
3334                 "               OpBranch %if_end\n"
3335                 "%if_false    = OpLabel\n"
3336                 "               OpUnreachable\n" // Unreachable else branch for if statement
3337                 "%if_end      = OpLabel\n"
3338                 "               OpReturn\n"
3339                 "               OpFunctionEnd\n"
3340
3341                 // not_called_function()
3342                 "%func_not_called_func  = OpFunction %void None %voidf\n"
3343                 "%not_called_func_entry = OpLabel\n"
3344                 "                         OpUnreachable\n" // Unreachable entry block in not called static function
3345                 "                         OpFunctionEnd\n"
3346
3347                 // modulo4()
3348                 "%func_modulo4  = OpFunction %u32 None %uintfuint\n"
3349                 "%valptr        = OpFunctionParameter %u32ptr\n"
3350                 "%modulo4_entry = OpLabel\n"
3351                 "%val           = OpLoad %u32 %valptr\n"
3352                 "%modulo        = OpUMod %u32 %val %four\n"
3353                 "                 OpSelectionMerge %switch_merge None\n"
3354                 "                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3355                 "%case0         = OpLabel\n"
3356                 "                 OpReturnValue %three\n"
3357                 "%case1         = OpLabel\n"
3358                 "                 OpReturnValue %two\n"
3359                 "%case2         = OpLabel\n"
3360                 "                 OpReturnValue %one\n"
3361                 "%case3         = OpLabel\n"
3362                 "                 OpReturnValue %zero\n"
3363                 "%default       = OpLabel\n"
3364                 "                 OpUnreachable\n" // Unreachable default case for switch statement
3365                 "%switch_merge  = OpLabel\n"
3366                 "                 OpUnreachable\n" // Unreachable merge block for switch statement
3367                 "                 OpFunctionEnd\n"
3368
3369                 // const5()
3370                 "%func_const5  = OpFunction %u32 None %unitf\n"
3371                 "%const5_entry = OpLabel\n"
3372                 "                OpReturnValue %five\n"
3373                 "%unreachable  = OpLabel\n"
3374                 "                OpUnreachable\n" // Unreachable block in function
3375                 "                OpFunctionEnd\n";
3376         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3377         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3378         spec.numWorkGroups = IVec3(numElements, 1, 1);
3379
3380         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3381
3382         addOpUnreachableAmberTests(*group, testCtx);
3383
3384         return group.release();
3385 }
3386
3387 // Assembly code used for testing decoration group is based on GLSL source code:
3388 //
3389 // #version 430
3390 //
3391 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3392 //   float elements[];
3393 // } input_data0;
3394 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3395 //   float elements[];
3396 // } input_data1;
3397 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3398 //   float elements[];
3399 // } input_data2;
3400 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3401 //   float elements[];
3402 // } input_data3;
3403 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3404 //   float elements[];
3405 // } input_data4;
3406 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3407 //   float elements[];
3408 // } output_data;
3409 //
3410 // void main() {
3411 //   uint x = gl_GlobalInvocationID.x;
3412 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3413 // }
3414 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3415 {
3416         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3417         ComputeShaderSpec                               spec;
3418         de::Random                                              rnd                             (deStringHash(group->getName()));
3419         const int                                               numElements             = 100;
3420         vector<float>                                   inputFloats0    (numElements, 0);
3421         vector<float>                                   inputFloats1    (numElements, 0);
3422         vector<float>                                   inputFloats2    (numElements, 0);
3423         vector<float>                                   inputFloats3    (numElements, 0);
3424         vector<float>                                   inputFloats4    (numElements, 0);
3425         vector<float>                                   outputFloats    (numElements, 0);
3426
3427         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3428         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3429         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3430         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3431         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3432
3433         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3434         floorAll(inputFloats0);
3435         floorAll(inputFloats1);
3436         floorAll(inputFloats2);
3437         floorAll(inputFloats3);
3438         floorAll(inputFloats4);
3439
3440         for (size_t ndx = 0; ndx < numElements; ++ndx)
3441                 outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3442
3443         spec.assembly =
3444                 string(getComputeAsmShaderPreamble()) +
3445
3446                 "OpSource GLSL 430\n"
3447                 "OpName %main \"main\"\n"
3448                 "OpName %id \"gl_GlobalInvocationID\"\n"
3449
3450                 // Not using group decoration on variable.
3451                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3452                 // Not using group decoration on type.
3453                 "OpDecorate %f32arr ArrayStride 4\n"
3454
3455                 "OpDecorate %groups BufferBlock\n"
3456                 "OpDecorate %groupm Offset 0\n"
3457                 "%groups = OpDecorationGroup\n"
3458                 "%groupm = OpDecorationGroup\n"
3459
3460                 // Group decoration on multiple structs.
3461                 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3462                 // Group decoration on multiple struct members.
3463                 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3464
3465                 "OpDecorate %group1 DescriptorSet 0\n"
3466                 "OpDecorate %group3 DescriptorSet 0\n"
3467                 "OpDecorate %group3 NonWritable\n"
3468                 "OpDecorate %group3 Restrict\n"
3469                 "%group0 = OpDecorationGroup\n"
3470                 "%group1 = OpDecorationGroup\n"
3471                 "%group3 = OpDecorationGroup\n"
3472
3473                 // Applying the same decoration group multiple times.
3474                 "OpGroupDecorate %group1 %outdata\n"
3475                 "OpGroupDecorate %group1 %outdata\n"
3476                 "OpGroupDecorate %group1 %outdata\n"
3477                 "OpDecorate %outdata DescriptorSet 0\n"
3478                 "OpDecorate %outdata Binding 5\n"
3479                 // Applying decoration group containing nothing.
3480                 "OpGroupDecorate %group0 %indata0\n"
3481                 "OpDecorate %indata0 DescriptorSet 0\n"
3482                 "OpDecorate %indata0 Binding 0\n"
3483                 // Applying decoration group containing one decoration.
3484                 "OpGroupDecorate %group1 %indata1\n"
3485                 "OpDecorate %indata1 Binding 1\n"
3486                 // Applying decoration group containing multiple decorations.
3487                 "OpGroupDecorate %group3 %indata2 %indata3\n"
3488                 "OpDecorate %indata2 Binding 2\n"
3489                 "OpDecorate %indata3 Binding 3\n"
3490                 // Applying multiple decoration groups (with overlapping).
3491                 "OpGroupDecorate %group0 %indata4\n"
3492                 "OpGroupDecorate %group1 %indata4\n"
3493                 "OpGroupDecorate %group3 %indata4\n"
3494                 "OpDecorate %indata4 Binding 4\n"
3495
3496                 + string(getComputeAsmCommonTypes()) +
3497
3498                 "%id   = OpVariable %uvec3ptr Input\n"
3499                 "%zero = OpConstant %i32 0\n"
3500
3501                 "%outbuf    = OpTypeStruct %f32arr\n"
3502                 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3503                 "%outdata   = OpVariable %outbufptr Uniform\n"
3504                 "%inbuf0    = OpTypeStruct %f32arr\n"
3505                 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3506                 "%indata0   = OpVariable %inbuf0ptr Uniform\n"
3507                 "%inbuf1    = OpTypeStruct %f32arr\n"
3508                 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3509                 "%indata1   = OpVariable %inbuf1ptr Uniform\n"
3510                 "%inbuf2    = OpTypeStruct %f32arr\n"
3511                 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3512                 "%indata2   = OpVariable %inbuf2ptr Uniform\n"
3513                 "%inbuf3    = OpTypeStruct %f32arr\n"
3514                 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3515                 "%indata3   = OpVariable %inbuf3ptr Uniform\n"
3516                 "%inbuf4    = OpTypeStruct %f32arr\n"
3517                 "%inbufptr  = OpTypePointer Uniform %inbuf4\n"
3518                 "%indata4   = OpVariable %inbufptr Uniform\n"
3519
3520                 "%main   = OpFunction %void None %voidf\n"
3521                 "%label  = OpLabel\n"
3522                 "%idval  = OpLoad %uvec3 %id\n"
3523                 "%x      = OpCompositeExtract %u32 %idval 0\n"
3524                 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3525                 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3526                 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3527                 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3528                 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3529                 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3530                 "%inval0 = OpLoad %f32 %inloc0\n"
3531                 "%inval1 = OpLoad %f32 %inloc1\n"
3532                 "%inval2 = OpLoad %f32 %inloc2\n"
3533                 "%inval3 = OpLoad %f32 %inloc3\n"
3534                 "%inval4 = OpLoad %f32 %inloc4\n"
3535                 "%add0   = OpFAdd %f32 %inval0 %inval1\n"
3536                 "%add1   = OpFAdd %f32 %add0 %inval2\n"
3537                 "%add2   = OpFAdd %f32 %add1 %inval3\n"
3538                 "%add    = OpFAdd %f32 %add2 %inval4\n"
3539                 "          OpStore %outloc %add\n"
3540                 "          OpReturn\n"
3541                 "          OpFunctionEnd\n";
3542         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3543         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3544         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3545         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3546         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3547         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3548         spec.numWorkGroups = IVec3(numElements, 1, 1);
3549
3550         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3551
3552         return group.release();
3553 }
3554
3555 enum SpecConstantType
3556 {
3557         SC_INT8,
3558         SC_UINT8,
3559         SC_INT16,
3560         SC_UINT16,
3561         SC_INT32,
3562         SC_UINT32,
3563         SC_INT64,
3564         SC_UINT64,
3565         SC_FLOAT16,
3566         SC_FLOAT32,
3567         SC_FLOAT64,
3568 };
3569
3570 struct SpecConstantValue
3571 {
3572         SpecConstantType type;
3573         union ValueUnion {
3574                 deInt8                  i8;
3575                 deUint8                 u8;
3576                 deInt16                 i16;
3577                 deUint16                u16;
3578                 deInt32                 i32;
3579                 deUint32                u32;
3580                 deInt64                 i64;
3581                 deUint64                u64;
3582                 tcu::Float16    f16;
3583                 tcu::Float32    f32;
3584                 tcu::Float64    f64;
3585
3586                 ValueUnion (deInt8                      v) : i8(v)      {}
3587                 ValueUnion (deUint8                     v) : u8(v)      {}
3588                 ValueUnion (deInt16                     v) : i16(v)     {}
3589                 ValueUnion (deUint16            v) : u16(v)     {}
3590                 ValueUnion (deInt32                     v) : i32(v)     {}
3591                 ValueUnion (deUint32            v) : u32(v)     {}
3592                 ValueUnion (deInt64                     v) : i64(v)     {}
3593                 ValueUnion (deUint64            v) : u64(v)     {}
3594                 ValueUnion (tcu::Float16        v) : f16(v)     {}
3595                 ValueUnion (tcu::Float32        v) : f32(v)     {}
3596                 ValueUnion (tcu::Float64        v) : f64(v)     {}
3597         } value;
3598
3599         SpecConstantValue (deInt8                       v) : type(SC_INT8)              , value(v) {}
3600         SpecConstantValue (deUint8                      v) : type(SC_UINT8)             , value(v) {}
3601         SpecConstantValue (deInt16                      v) : type(SC_INT16)             , value(v) {}
3602         SpecConstantValue (deUint16                     v) : type(SC_UINT16)    , value(v) {}
3603         SpecConstantValue (deInt32                      v) : type(SC_INT32)             , value(v) {}
3604         SpecConstantValue (deUint32                     v) : type(SC_UINT32)    , value(v) {}
3605         SpecConstantValue (deInt64                      v) : type(SC_INT64)             , value(v) {}
3606         SpecConstantValue (deUint64                     v) : type(SC_UINT64)    , value(v) {}
3607         SpecConstantValue (tcu::Float16         v) : type(SC_FLOAT16)   , value(v) {}
3608         SpecConstantValue (tcu::Float32         v) : type(SC_FLOAT32)   , value(v) {}
3609         SpecConstantValue (tcu::Float64         v) : type(SC_FLOAT64)   , value(v) {}
3610
3611         void appendTo(vkt::SpirVAssembly::SpecConstants& specConstants)
3612         {
3613                 switch (type)
3614                 {
3615                 case SC_INT8:           specConstants.append(value.i8);         break;
3616                 case SC_UINT8:          specConstants.append(value.u8);         break;
3617                 case SC_INT16:          specConstants.append(value.i16);        break;
3618                 case SC_UINT16:         specConstants.append(value.u16);        break;
3619                 case SC_INT32:          specConstants.append(value.i32);        break;
3620                 case SC_UINT32:         specConstants.append(value.u32);        break;
3621                 case SC_INT64:          specConstants.append(value.i64);        break;
3622                 case SC_UINT64:         specConstants.append(value.u64);        break;
3623                 case SC_FLOAT16:        specConstants.append(value.f16);        break;
3624                 case SC_FLOAT32:        specConstants.append(value.f32);        break;
3625                 case SC_FLOAT64:        specConstants.append(value.f64);        break;
3626                 default:
3627                         DE_ASSERT(false);
3628                 }
3629         }
3630 };
3631
3632 enum CaseFlagBits
3633 {
3634         FLAG_NONE               = 0,
3635         FLAG_CONVERT    = 1,
3636         FLAG_I8                 = (1<<1),
3637         FLAG_I16                = (1<<2),
3638         FLAG_I64                = (1<<3),
3639         FLAG_F16                = (1<<4),
3640         FLAG_F64                = (1<<5),
3641 };
3642 using CaseFlags = deUint32;
3643
3644 struct SpecConstantTwoValCase
3645 {
3646         const std::string       caseName;
3647         const std::string       scDefinition0;
3648         const std::string       scDefinition1;
3649         const std::string       scResultType;
3650         const std::string       scOperation;
3651         SpecConstantValue       scActualValue0;
3652         SpecConstantValue       scActualValue1;
3653         const std::string       resultOperation;
3654         vector<deInt32>         expectedOutput;
3655         CaseFlags                       caseFlags;
3656
3657                                                 SpecConstantTwoValCase (const std::string& name,
3658                                                                                                 const std::string& definition0,
3659                                                                                                 const std::string& definition1,
3660                                                                                                 const std::string& resultType,
3661                                                                                                 const std::string& operation,
3662                                                                                                 SpecConstantValue value0,
3663                                                                                                 SpecConstantValue value1,
3664                                                                                                 const std::string& resultOp,
3665                                                                                                 const vector<deInt32>& output,
3666                                                                                                 CaseFlags flags = FLAG_NONE)
3667                                                         : caseName                              (name)
3668                                                         , scDefinition0                 (definition0)
3669                                                         , scDefinition1                 (definition1)
3670                                                         , scResultType                  (resultType)
3671                                                         , scOperation                   (operation)
3672                                                         , scActualValue0                (value0)
3673                                                         , scActualValue1                (value1)
3674                                                         , resultOperation               (resultOp)
3675                                                         , expectedOutput                (output)
3676                                                         , caseFlags                             (flags)
3677                                                         {}
3678 };
3679
3680 std::string getSpecConstantOpStructConstantsAndTypes ()
3681 {
3682         return
3683                 "%zero        = OpConstant %i32 0\n"
3684                 "%one         = OpConstant %i32 1\n"
3685                 "%two         = OpConstant %i32 2\n"
3686                 "%three       = OpConstant %i32 3\n"
3687                 "%iarr3       = OpTypeArray %i32 %three\n"
3688                 "%imat3       = OpTypeArray %iarr3 %three\n"
3689                 "%struct      = OpTypeStruct %imat3\n"
3690                 ;
3691 }
3692
3693 std::string getSpecConstantOpStructComposites ()
3694 {
3695         return
3696                 "%iarr3_0     = OpConstantComposite %iarr3 %zero %zero %zero\n"
3697                 "%imat3_0     = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3698                 "%struct_0    = OpConstantComposite %struct %imat3_0\n"
3699                 ;
3700 }
3701
3702 std::string getSpecConstantOpStructConstBlock ()
3703 {
3704         return
3705                 "%iarr3_a     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_0     0\n"                        // Compose (sc_0, sc_1, sc_2)
3706                 "%iarr3_b     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_a     1\n"
3707                 "%iarr3_c     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_b     2\n"
3708
3709                 "%iarr3_d     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_0     0\n"                        // Compose (sc_1, sc_2, sc_0)
3710                 "%iarr3_e     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_d     1\n"
3711                 "%iarr3_f     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_e     2\n"
3712
3713                 "%iarr3_g     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_0     0\n"                        // Compose (sc_2, sc_0, sc_1)
3714                 "%iarr3_h     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_g     1\n"
3715                 "%iarr3_i     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_h     2\n"
3716
3717                 "%imat3_a     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_c     %imat3_0     0\n"                                                // Matrix with the 3 previous arrays.
3718                 "%imat3_b     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_f     %imat3_a     1\n"
3719                 "%imat3_c     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_i     %imat3_b     2\n"
3720
3721                 "%struct_a    = OpSpecConstantOp %struct CompositeInsert  %imat3_c     %struct_0    0\n"                                                // Save it in the struct.
3722
3723                 "%comp_0_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 0\n"                                                                 // Extract some component pairs to compare them.
3724                 "%comp_1_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 0\n"
3725
3726                 "%comp_0_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 1\n"
3727                 "%comp_2_2    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 2\n"
3728
3729                 "%comp_2_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 0\n"
3730                 "%comp_1_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 1\n"
3731
3732                 "%cmpres_0    = OpSpecConstantOp %bool   IEqual %comp_0_0 %comp_1_0\n"                                                                                  // Must be false.
3733                 "%cmpres_1    = OpSpecConstantOp %bool   IEqual %comp_0_1 %comp_2_2\n"                                                                                  // Must be true.
3734                 "%cmpres_2    = OpSpecConstantOp %bool   IEqual %comp_2_0 %comp_1_1\n"                                                                                  // Must be true.
3735
3736                 "%mustbe_0    = OpSpecConstantOp %i32    Select %cmpres_0 %one %zero\n"                                                                                 // Must select 0
3737                 "%mustbe_1    = OpSpecConstantOp %i32    Select %cmpres_1 %one %zero\n"                                                                                 // Must select 1
3738                 "%mustbe_2    = OpSpecConstantOp %i32    Select %cmpres_2 %two %one\n"                                                                                  // Must select 2
3739                 ;
3740 }
3741
3742 std::string getSpecConstantOpStructInstructions ()
3743 {
3744         return
3745                 // Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3746                 // the final result should not be altered.
3747                 "%subf_a      = OpISub %i32 %one %mustbe_0\n"
3748                 "%subf_b      = OpIMul %i32 %subf_a %mustbe_1\n"
3749                 "%subf_c      = OpISub %i32 %mustbe_2 %one\n"
3750                 "%factor      = OpIMul %i32 %subf_b %subf_c\n"
3751                 "%sc_final    = OpIMul %i32 %factor %sc_factor\n"
3752                 ;
3753 }
3754
3755 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3756 {
3757         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3758         vector<SpecConstantTwoValCase>  cases;
3759         de::Random                                              rnd                             (deStringHash(group->getName()));
3760         const int                                               numElements             = 100;
3761         vector<deInt32>                                 inputInts               (numElements, 0);
3762         vector<deInt32>                                 outputInts1             (numElements, 0);
3763         vector<deInt32>                                 outputInts2             (numElements, 0);
3764         vector<deInt32>                                 outputInts3             (numElements, 0);
3765         vector<deInt32>                                 outputInts4             (numElements, 0);
3766         vector<deInt32>                                 outputInts5             (numElements, 0);
3767         const StringTemplate                    shaderTemplate  (
3768                 "${CAPABILITIES:opt}"
3769                 + string(getComputeAsmShaderPreamble()) +
3770
3771                 "OpName %main           \"main\"\n"
3772                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3773
3774                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3775                 "OpDecorate %sc_0  SpecId 0\n"
3776                 "OpDecorate %sc_1  SpecId 1\n"
3777                 "OpDecorate %i32arr ArrayStride 4\n"
3778
3779                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3780
3781                 "${OPTYPE_DEFINITIONS:opt}"
3782                 "%buf     = OpTypeStruct %i32arr\n"
3783                 "%bufptr  = OpTypePointer Uniform %buf\n"
3784                 "%indata    = OpVariable %bufptr Uniform\n"
3785                 "%outdata   = OpVariable %bufptr Uniform\n"
3786
3787                 "%id        = OpVariable %uvec3ptr Input\n"
3788                 "%zero      = OpConstant %i32 0\n"
3789
3790                 "%sc_0      = OpSpecConstant${SC_DEF0}\n"
3791                 "%sc_1      = OpSpecConstant${SC_DEF1}\n"
3792                 "%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3793
3794                 "%main      = OpFunction %void None %voidf\n"
3795                 "%label     = OpLabel\n"
3796                 "${TYPE_CONVERT:opt}"
3797                 "%idval     = OpLoad %uvec3 %id\n"
3798                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3799                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3800                 "%inval     = OpLoad %i32 %inloc\n"
3801                 "%final     = ${GEN_RESULT}\n"
3802                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3803                 "             OpStore %outloc %final\n"
3804                 "             OpReturn\n"
3805                 "             OpFunctionEnd\n");
3806
3807         fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3808
3809         for (size_t ndx = 0; ndx < numElements; ++ndx)
3810         {
3811                 outputInts1[ndx] = inputInts[ndx] + 42;
3812                 outputInts2[ndx] = inputInts[ndx];
3813                 outputInts3[ndx] = inputInts[ndx] - 11200;
3814                 outputInts4[ndx] = inputInts[ndx] + 1;
3815                 outputInts5[ndx] = inputInts[ndx] - 42;
3816         }
3817
3818         const char addScToInput[]               = "OpIAdd %i32 %inval %sc_final";
3819         const char addSc32ToInput[]             = "OpIAdd %i32 %inval %sc_final32";
3820         const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_final %inval %zero";
3821         const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3822
3823         cases.push_back(SpecConstantTwoValCase("iadd",                                          " %i32 0",              " %i32 0",              "%i32",         "IAdd                 %sc_0 %sc_1",                     62,                                             -20,                            addScToInput,           outputInts1));
3824         cases.push_back(SpecConstantTwoValCase("isub",                                          " %i32 0",              " %i32 0",              "%i32",         "ISub                 %sc_0 %sc_1",                     100,                                    58,                                     addScToInput,           outputInts1));
3825         cases.push_back(SpecConstantTwoValCase("imul",                                          " %i32 0",              " %i32 0",              "%i32",         "IMul                 %sc_0 %sc_1",                     -2,                                             -21,                            addScToInput,           outputInts1));
3826         cases.push_back(SpecConstantTwoValCase("sdiv",                                          " %i32 0",              " %i32 0",              "%i32",         "SDiv                 %sc_0 %sc_1",                     -126,                                   -3,                                     addScToInput,           outputInts1));
3827         cases.push_back(SpecConstantTwoValCase("udiv",                                          " %i32 0",              " %i32 0",              "%i32",         "UDiv                 %sc_0 %sc_1",                     126,                                    3,                                      addScToInput,           outputInts1));
3828         cases.push_back(SpecConstantTwoValCase("srem",                                          " %i32 0",              " %i32 0",              "%i32",         "SRem                 %sc_0 %sc_1",                     7,                                              3,                                      addScToInput,           outputInts4));
3829         cases.push_back(SpecConstantTwoValCase("smod",                                          " %i32 0",              " %i32 0",              "%i32",         "SMod                 %sc_0 %sc_1",                     7,                                              3,                                      addScToInput,           outputInts4));
3830         cases.push_back(SpecConstantTwoValCase("umod",                                          " %i32 0",              " %i32 0",              "%i32",         "UMod                 %sc_0 %sc_1",                     342,                                    50,                                     addScToInput,           outputInts1));
3831         cases.push_back(SpecConstantTwoValCase("bitwiseand",                            " %i32 0",              " %i32 0",              "%i32",         "BitwiseAnd           %sc_0 %sc_1",                     42,                                             63,                                     addScToInput,           outputInts1));
3832         cases.push_back(SpecConstantTwoValCase("bitwiseor",                                     " %i32 0",              " %i32 0",              "%i32",         "BitwiseOr            %sc_0 %sc_1",                     34,                                             8,                                      addScToInput,           outputInts1));
3833         cases.push_back(SpecConstantTwoValCase("bitwisexor",                            " %i32 0",              " %i32 0",              "%i32",         "BitwiseXor           %sc_0 %sc_1",                     18,                                             56,                                     addScToInput,           outputInts1));
3834         cases.push_back(SpecConstantTwoValCase("shiftrightlogical",                     " %i32 0",              " %i32 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                     168,                                    2,                                      addScToInput,           outputInts1));
3835         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic",          " %i32 0",              " %i32 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                     -168,                                   2,                                      addScToInput,           outputInts5));
3836         cases.push_back(SpecConstantTwoValCase("shiftleftlogical",                      " %i32 0",              " %i32 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                     21,                                             1,                                      addScToInput,           outputInts1));
3837
3838         // Shifts for other integer sizes.
3839         cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64",         " %i64 0",              " %i64 0",              "%i64",         "ShiftRightLogical    %sc_0 %sc_1",                     deInt64{168},                   deInt64{2},                     addSc32ToInput,         outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3840         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64",      " %i64 0",              " %i64 0",              "%i64",         "ShiftRightArithmetic %sc_0 %sc_1",                     deInt64{-168},                  deInt64{2},                     addSc32ToInput,         outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3841         cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64",          " %i64 0",              " %i64 0",              "%i64",         "ShiftLeftLogical     %sc_0 %sc_1",                     deInt64{21},                    deInt64{1},                     addSc32ToInput,         outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3842         cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16",         " %i16 0",              " %i16 0",              "%i16",         "ShiftRightLogical    %sc_0 %sc_1",                     deInt16{168},                   deInt16{2},                     addSc32ToInput,         outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3843         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16",      " %i16 0",              " %i16 0",              "%i16",         "ShiftRightArithmetic %sc_0 %sc_1",                     deInt16{-168},                  deInt16{2},                     addSc32ToInput,         outputInts5, (FLAG_I16 | FLAG_CONVERT)));
3844         cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16",          " %i16 0",              " %i16 0",              "%i16",         "ShiftLeftLogical     %sc_0 %sc_1",                     deInt16{21},                    deInt16{1},                     addSc32ToInput,         outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3845         cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8",          " %i8 0",               " %i8 0",               "%i8",          "ShiftRightLogical    %sc_0 %sc_1",                     deInt8{84},                             deInt8{1},                      addSc32ToInput,         outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3846         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8",       " %i8 0",               " %i8 0",               "%i8",          "ShiftRightArithmetic %sc_0 %sc_1",                     deInt8{-84},                    deInt8{1},                      addSc32ToInput,         outputInts5, (FLAG_I8 | FLAG_CONVERT)));
3847         cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8",           " %i8 0",               " %i8 0",               "%i8",          "ShiftLeftLogical     %sc_0 %sc_1",                     deInt8{21},                             deInt8{1},                      addSc32ToInput,         outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3848
3849         // Shifts for other integer sizes but only in the shift amount.
3850         cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64",       " %i32 0",              " %i64 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                     168,                                    deInt64{2},                     addScToInput,           outputInts1, (FLAG_I64)));
3851         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64"," %i32 0",          " %i64 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                     -168,                                   deInt64{2},                     addScToInput,           outputInts5, (FLAG_I64)));
3852         cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64",        " %i32 0",              " %i64 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                     21,                                             deInt64{1},                     addScToInput,           outputInts1, (FLAG_I64)));
3853         cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16",       " %i32 0",              " %i16 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                     168,                                    deInt16{2},                     addScToInput,           outputInts1, (FLAG_I16)));
3854         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16"," %i32 0",          " %i16 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                     -168,                                   deInt16{2},                     addScToInput,           outputInts5, (FLAG_I16)));
3855         cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16",        " %i32 0",              " %i16 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                     21,                                             deInt16{1},                     addScToInput,           outputInts1, (FLAG_I16)));
3856         cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8",        " %i32 0",              " %i8 0",               "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                     84,                                             deInt8{1},                      addScToInput,           outputInts1, (FLAG_I8)));
3857         cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8",     " %i32 0",              " %i8 0",               "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                     -84,                                    deInt8{1},                      addScToInput,           outputInts5, (FLAG_I8)));
3858         cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8",         " %i32 0",              " %i8 0",               "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                     21,                                             deInt8{1},                      addScToInput,           outputInts1, (FLAG_I8)));
3859
3860         cases.push_back(SpecConstantTwoValCase("slessthan",                                     " %i32 0",              " %i32 0",              "%bool",        "SLessThan            %sc_0 %sc_1",                     -20,                                    -10,                            selectTrueUsingSc,      outputInts2));
3861         cases.push_back(SpecConstantTwoValCase("ulessthan",                                     " %i32 0",              " %i32 0",              "%bool",        "ULessThan            %sc_0 %sc_1",                     10,                                             20,                                     selectTrueUsingSc,      outputInts2));
3862         cases.push_back(SpecConstantTwoValCase("sgreaterthan",                          " %i32 0",              " %i32 0",              "%bool",        "SGreaterThan         %sc_0 %sc_1",                     -1000,                                  50,                                     selectFalseUsingSc,     outputInts2));
3863         cases.push_back(SpecConstantTwoValCase("ugreaterthan",                          " %i32 0",              " %i32 0",              "%bool",        "UGreaterThan         %sc_0 %sc_1",                     10,                                             5,                                      selectTrueUsingSc,      outputInts2));
3864         cases.push_back(SpecConstantTwoValCase("slessthanequal",                        " %i32 0",              " %i32 0",              "%bool",        "SLessThanEqual       %sc_0 %sc_1",                     -10,                                    -10,                            selectTrueUsingSc,      outputInts2));
3865         cases.push_back(SpecConstantTwoValCase("ulessthanequal",                        " %i32 0",              " %i32 0",              "%bool",        "ULessThanEqual       %sc_0 %sc_1",                     50,                                             100,                            selectTrueUsingSc,      outputInts2));
3866         cases.push_back(SpecConstantTwoValCase("sgreaterthanequal",                     " %i32 0",              " %i32 0",              "%bool",        "SGreaterThanEqual    %sc_0 %sc_1",                     -1000,                                  50,                                     selectFalseUsingSc,     outputInts2));
3867         cases.push_back(SpecConstantTwoValCase("ugreaterthanequal",                     " %i32 0",              " %i32 0",              "%bool",        "UGreaterThanEqual    %sc_0 %sc_1",                     10,                                             10,                                     selectTrueUsingSc,      outputInts2));
3868         cases.push_back(SpecConstantTwoValCase("iequal",                                        " %i32 0",              " %i32 0",              "%bool",        "IEqual               %sc_0 %sc_1",                     42,                                             24,                                     selectFalseUsingSc,     outputInts2));
3869         cases.push_back(SpecConstantTwoValCase("inotequal",                                     " %i32 0",              " %i32 0",              "%bool",        "INotEqual            %sc_0 %sc_1",                     42,                                             24,                                     selectTrueUsingSc,      outputInts2));
3870         cases.push_back(SpecConstantTwoValCase("logicaland",                            "True %bool",   "True %bool",   "%bool",        "LogicalAnd           %sc_0 %sc_1",                     0,                                              1,                                      selectFalseUsingSc,     outputInts2));
3871         cases.push_back(SpecConstantTwoValCase("logicalor",                                     "False %bool",  "False %bool",  "%bool",        "LogicalOr            %sc_0 %sc_1",                     1,                                              0,                                      selectTrueUsingSc,      outputInts2));
3872         cases.push_back(SpecConstantTwoValCase("logicalequal",                          "True %bool",   "True %bool",   "%bool",        "LogicalEqual         %sc_0 %sc_1",                     0,                                              1,                                      selectFalseUsingSc,     outputInts2));
3873         cases.push_back(SpecConstantTwoValCase("logicalnotequal",                       "False %bool",  "False %bool",  "%bool",        "LogicalNotEqual      %sc_0 %sc_1",                     1,                                              0,                                      selectTrueUsingSc,      outputInts2));
3874         cases.push_back(SpecConstantTwoValCase("snegate",                                       " %i32 0",              " %i32 0",              "%i32",         "SNegate              %sc_0",                           -42,                                    0,                                      addScToInput,           outputInts1));
3875         cases.push_back(SpecConstantTwoValCase("not",                                           " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                           -43,                                    0,                                      addScToInput,           outputInts1));
3876         cases.push_back(SpecConstantTwoValCase("logicalnot",                            "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                           1,                                              0,                                      selectFalseUsingSc,     outputInts2));
3877         cases.push_back(SpecConstantTwoValCase("select",                                        "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %zero",       1,                                              42,                                     addScToInput,           outputInts1));
3878         cases.push_back(SpecConstantTwoValCase("sconvert",                                      " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                           -11200,                                 0,                                      addSc32ToInput,         outputInts3, (FLAG_I16 | FLAG_CONVERT)));
3879         cases.push_back(SpecConstantTwoValCase("fconvert",                                      " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                           tcu::Float32{-11200.0}, tcu::Float32{0.0},      addSc32ToInput,         outputInts3, (FLAG_F64 | FLAG_CONVERT)));
3880         cases.push_back(SpecConstantTwoValCase("fconvert16",                            " %f16 0",              " %f16 0",              "%f32",         "FConvert             %sc_0",                           tcu::Float16{1.0},              tcu::Float16{0.0},      addSc32ToInput,         outputInts4, (FLAG_F16 | FLAG_CONVERT)));
3881
3882         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3883         {
3884                 map<string, string>             specializations;
3885                 ComputeShaderSpec               spec;
3886
3887                 specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
3888                 specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
3889                 specializations["SC_RESULT_TYPE"]       = cases[caseNdx].scResultType;
3890                 specializations["SC_OP"]                        = cases[caseNdx].scOperation;
3891                 specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
3892
3893                 // Special SPIR-V code when using 16-bit integers.
3894                 if (cases[caseNdx].caseFlags & FLAG_I16)
3895                 {
3896                         spec.requestedVulkanFeatures.coreFeatures.shaderInt16   = VK_TRUE;
3897                         specializations["CAPABILITIES"]                                                 += "OpCapability Int16\n";                                                      // Adds 16-bit integer capability
3898                         specializations["OPTYPE_DEFINITIONS"]                                   += "%i16 = OpTypeInt 16 1\n";                                           // Adds 16-bit integer type
3899                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3900                                 specializations["TYPE_CONVERT"]                                         += "%sc_final32 = OpSConvert %i32 %sc_final\n";         // Converts 16-bit integer to 32-bit integer
3901                 }
3902
3903                 // Special SPIR-V code when using 64-bit integers.
3904                 if (cases[caseNdx].caseFlags & FLAG_I64)
3905                 {
3906                         spec.requestedVulkanFeatures.coreFeatures.shaderInt64   = VK_TRUE;
3907                         specializations["CAPABILITIES"]                                                 += "OpCapability Int64\n";                                                      // Adds 64-bit integer capability
3908                         specializations["OPTYPE_DEFINITIONS"]                                   += "%i64 = OpTypeInt 64 1\n";                                           // Adds 64-bit integer type
3909                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3910                                 specializations["TYPE_CONVERT"]                                         += "%sc_final32 = OpSConvert %i32 %sc_final\n";         // Converts 64-bit integer to 32-bit integer
3911                 }
3912
3913                 // Special SPIR-V code when using 64-bit floats.
3914                 if (cases[caseNdx].caseFlags & FLAG_F64)
3915                 {
3916                         spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3917                         specializations["CAPABILITIES"]                                                 += "OpCapability Float64\n";                                            // Adds 64-bit float capability
3918                         specializations["OPTYPE_DEFINITIONS"]                                   += "%f64 = OpTypeFloat 64\n";                                           // Adds 64-bit float type
3919                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3920                                 specializations["TYPE_CONVERT"]                                         += "%sc_final32 = OpConvertFToS %i32 %sc_final\n";      // Converts 64-bit float to 32-bit integer
3921                 }
3922
3923                 // Extension needed for float16 and int8.
3924                 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
3925                         spec.extensions.push_back("VK_KHR_shader_float16_int8");
3926
3927                 // Special SPIR-V code when using 16-bit floats.
3928                 if (cases[caseNdx].caseFlags & FLAG_F16)
3929                 {
3930                         spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3931                         specializations["CAPABILITIES"]                         += "OpCapability Float16\n";                                            // Adds 16-bit float capability
3932                         specializations["OPTYPE_DEFINITIONS"]           += "%f16 = OpTypeFloat 16\n";                                           // Adds 16-bit float type
3933                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3934                                 specializations["TYPE_CONVERT"]                 += "%sc_final32 = OpConvertFToS %i32 %sc_final\n";      // Converts 16-bit float to 32-bit integer
3935                 }
3936
3937                 // Special SPIR-V code when using 8-bit integers.
3938                 if (cases[caseNdx].caseFlags & FLAG_I8)
3939                 {
3940                         spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
3941                         specializations["CAPABILITIES"]                         += "OpCapability Int8\n";                                               // Adds 8-bit integer capability
3942                         specializations["OPTYPE_DEFINITIONS"]           += "%i8 = OpTypeInt 8 1\n";                                             // Adds 8-bit integer type
3943                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3944                                 specializations["TYPE_CONVERT"]                 += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 8-bit integer to 32-bit integer
3945                 }
3946
3947                 spec.assembly = shaderTemplate.specialize(specializations);
3948                 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3949                 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3950                 spec.numWorkGroups = IVec3(numElements, 1, 1);
3951                 cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
3952                 cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
3953
3954                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), cases[caseNdx].caseName.c_str(), spec));
3955         }
3956
3957         ComputeShaderSpec                               spec;
3958
3959         spec.assembly =
3960                 string(getComputeAsmShaderPreamble()) +
3961
3962                 "OpName %main           \"main\"\n"
3963                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3964
3965                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3966                 "OpDecorate %sc_0  SpecId 0\n"
3967                 "OpDecorate %sc_1  SpecId 1\n"
3968                 "OpDecorate %sc_2  SpecId 2\n"
3969                 "OpDecorate %i32arr ArrayStride 4\n"
3970
3971                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3972
3973                 "%ivec3       = OpTypeVector %i32 3\n"
3974
3975                 + getSpecConstantOpStructConstantsAndTypes() +
3976
3977                 "%buf         = OpTypeStruct %i32arr\n"
3978                 "%bufptr      = OpTypePointer Uniform %buf\n"
3979                 "%indata      = OpVariable %bufptr Uniform\n"
3980                 "%outdata     = OpVariable %bufptr Uniform\n"
3981
3982                 "%id          = OpVariable %uvec3ptr Input\n"
3983                 "%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
3984                 "%vec3_undef  = OpUndef %ivec3\n"
3985
3986                 + getSpecConstantOpStructComposites () +
3987
3988                 "%sc_0        = OpSpecConstant %i32 0\n"
3989                 "%sc_1        = OpSpecConstant %i32 0\n"
3990                 "%sc_2        = OpSpecConstant %i32 0\n"
3991
3992                 + getSpecConstantOpStructConstBlock () +
3993
3994                 "%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"                                                 // (sc_0, 0, 0)
3995                 "%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"                                                 // (0, sc_1, 0)
3996                 "%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"                                                 // (0, 0, sc_2)
3997                 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
3998                 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
3999                 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
4000                 "%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
4001                 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
4002                 "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
4003                 "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
4004                 "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
4005                 "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
4006                 "%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"                                                              // (sc_2 - sc_0) * sc_1
4007
4008                 "%main      = OpFunction %void None %voidf\n"
4009                 "%label     = OpLabel\n"
4010
4011                 + getSpecConstantOpStructInstructions() +
4012
4013                 "%idval     = OpLoad %uvec3 %id\n"
4014                 "%x         = OpCompositeExtract %u32 %idval 0\n"
4015                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
4016                 "%inval     = OpLoad %i32 %inloc\n"
4017                 "%final     = OpIAdd %i32 %inval %sc_final\n"
4018                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
4019                 "             OpStore %outloc %final\n"
4020                 "             OpReturn\n"
4021                 "             OpFunctionEnd\n";
4022         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4023         spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4024         spec.numWorkGroups = IVec3(numElements, 1, 1);
4025         spec.specConstants.append<deInt32>(123);
4026         spec.specConstants.append<deInt32>(56);
4027         spec.specConstants.append<deInt32>(-77);
4028
4029         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
4030
4031         return group.release();
4032 }
4033
4034 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
4035 {
4036         ComputeShaderSpec       specInt;
4037         ComputeShaderSpec       specFloat;
4038         ComputeShaderSpec       specFloat16;
4039         ComputeShaderSpec       specVec3;
4040         ComputeShaderSpec       specMat4;
4041         ComputeShaderSpec       specArray;
4042         ComputeShaderSpec       specStruct;
4043         de::Random                      rnd                             (deStringHash(group->getName()));
4044         const int                       numElements             = 100;
4045         vector<float>           inputFloats             (numElements, 0);
4046         vector<float>           outputFloats    (numElements, 0);
4047         vector<deUint32>        inputUints              (numElements, 0);
4048         vector<deUint32>        outputUints             (numElements, 0);
4049
4050         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4051
4052         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4053         floorAll(inputFloats);
4054
4055         for (size_t ndx = 0; ndx < numElements; ++ndx)
4056         {
4057                 // Just check if the value is positive or not
4058                 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4059         }
4060
4061         for (size_t ndx = 0; ndx < numElements; ++ndx)
4062         {
4063                 inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4064                 outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4065         }
4066
4067         // All of the tests are of the form:
4068         //
4069         // testtype r
4070         //
4071         // if (inputdata > 0)
4072         //   r = 1
4073         // else
4074         //   r = -1
4075         //
4076         // return (float)r
4077
4078         specFloat.assembly =
4079                 string(getComputeAsmShaderPreamble()) +
4080
4081                 "OpSource GLSL 430\n"
4082                 "OpName %main \"main\"\n"
4083                 "OpName %id \"gl_GlobalInvocationID\"\n"
4084
4085                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4086
4087                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4088
4089                 "%id = OpVariable %uvec3ptr Input\n"
4090                 "%zero       = OpConstant %i32 0\n"
4091                 "%float_0    = OpConstant %f32 0.0\n"
4092                 "%float_1    = OpConstant %f32 1.0\n"
4093                 "%float_n1   = OpConstant %f32 -1.0\n"
4094
4095                 "%main     = OpFunction %void None %voidf\n"
4096                 "%entry    = OpLabel\n"
4097                 "%idval    = OpLoad %uvec3 %id\n"
4098                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4099                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4100                 "%inval    = OpLoad %f32 %inloc\n"
4101
4102                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4103                 "            OpSelectionMerge %cm None\n"
4104                 "            OpBranchConditional %comp %tb %fb\n"
4105                 "%tb       = OpLabel\n"
4106                 "            OpBranch %cm\n"
4107                 "%fb       = OpLabel\n"
4108                 "            OpBranch %cm\n"
4109                 "%cm       = OpLabel\n"
4110                 "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4111
4112                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4113                 "            OpStore %outloc %res\n"
4114                 "            OpReturn\n"
4115
4116                 "            OpFunctionEnd\n";
4117         specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4118         specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4119         specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4120
4121         specFloat16.assembly =
4122                 "OpCapability Shader\n"
4123                 "OpCapability Float16\n"
4124                 "OpMemoryModel Logical GLSL450\n"
4125                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4126                 "OpExecutionMode %main LocalSize 1 1 1\n"
4127
4128                 "OpSource GLSL 430\n"
4129                 "OpName %main \"main\"\n"
4130                 "OpName %id \"gl_GlobalInvocationID\"\n"
4131
4132                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4133
4134                 "OpDecorate %buf BufferBlock\n"
4135                 "OpDecorate %indata DescriptorSet 0\n"
4136                 "OpDecorate %indata Binding 0\n"
4137                 "OpDecorate %outdata DescriptorSet 0\n"
4138                 "OpDecorate %outdata Binding 1\n"
4139                 "OpDecorate %u32arr ArrayStride 4\n"
4140                 "OpMemberDecorate %buf 0 Offset 0\n"
4141
4142                 + string(getComputeAsmCommonTypes()) +
4143
4144                 "%f16      = OpTypeFloat 16\n"
4145                 "%f16vec2  = OpTypeVector %f16 2\n"
4146                 "%fvec2    = OpTypeVector %f32 2\n"
4147                 "%u32ptr   = OpTypePointer Uniform %u32\n"
4148                 "%u32arr   = OpTypeRuntimeArray %u32\n"
4149                 "%f16_0    = OpConstant %f16 0.0\n"
4150
4151
4152                 "%buf      = OpTypeStruct %u32arr\n"
4153                 "%bufptr   = OpTypePointer Uniform %buf\n"
4154                 "%indata   = OpVariable %bufptr Uniform\n"
4155                 "%outdata  = OpVariable %bufptr Uniform\n"
4156
4157                 "%id       = OpVariable %uvec3ptr Input\n"
4158                 "%zero     = OpConstant %i32 0\n"
4159                 "%float_0  = OpConstant %f32 0.0\n"
4160                 "%float_1  = OpConstant %f32 1.0\n"
4161                 "%float_n1 = OpConstant %f32 -1.0\n"
4162
4163                 "%main     = OpFunction %void None %voidf\n"
4164                 "%entry    = OpLabel\n"
4165                 "%idval    = OpLoad %uvec3 %id\n"
4166                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4167                 "%inloc    = OpAccessChain %u32ptr %indata %zero %x\n"
4168                 "%inval    = OpLoad %u32 %inloc\n"
4169                 "%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4170                 "%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4171                 "%f32_inval = OpFConvert %f32 %f16_inval\n"
4172
4173                 "%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4174                 "            OpSelectionMerge %cm None\n"
4175                 "            OpBranchConditional %comp %tb %fb\n"
4176                 "%tb       = OpLabel\n"
4177                 "            OpBranch %cm\n"
4178                 "%fb       = OpLabel\n"
4179                 "            OpBranch %cm\n"
4180                 "%cm       = OpLabel\n"
4181                 "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4182                 "%f16_res  = OpFConvert %f16 %res\n"
4183
4184                 "%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4185                 "%u32_res  = OpBitcast %u32 %f16vec2_res\n"
4186
4187                 "%outloc   = OpAccessChain %u32ptr %outdata %zero %x\n"
4188                 "            OpStore %outloc %u32_res\n"
4189                 "            OpReturn\n"
4190
4191                 "            OpFunctionEnd\n";
4192
4193         specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4194         specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4195         specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4196         specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4197
4198         specMat4.assembly =
4199                 string(getComputeAsmShaderPreamble()) +
4200
4201                 "OpSource GLSL 430\n"
4202                 "OpName %main \"main\"\n"
4203                 "OpName %id \"gl_GlobalInvocationID\"\n"
4204
4205                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4206
4207                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4208
4209                 "%id = OpVariable %uvec3ptr Input\n"
4210                 "%v4f32      = OpTypeVector %f32 4\n"
4211                 "%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
4212                 "%zero       = OpConstant %i32 0\n"
4213                 "%float_0    = OpConstant %f32 0.0\n"
4214                 "%float_1    = OpConstant %f32 1.0\n"
4215                 "%float_n1   = OpConstant %f32 -1.0\n"
4216                 "%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4217                 "%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4218                 "%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4219                 "%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4220                 "%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4221                 "%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4222                 "%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4223                 "%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4224                 "%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4225                 "%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4226
4227                 "%main     = OpFunction %void None %voidf\n"
4228                 "%entry    = OpLabel\n"
4229                 "%idval    = OpLoad %uvec3 %id\n"
4230                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4231                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4232                 "%inval    = OpLoad %f32 %inloc\n"
4233
4234                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4235                 "            OpSelectionMerge %cm None\n"
4236                 "            OpBranchConditional %comp %tb %fb\n"
4237                 "%tb       = OpLabel\n"
4238                 "            OpBranch %cm\n"
4239                 "%fb       = OpLabel\n"
4240                 "            OpBranch %cm\n"
4241                 "%cm       = OpLabel\n"
4242                 "%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4243                 "%res      = OpCompositeExtract %f32 %mres 2 2\n"
4244
4245                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4246                 "            OpStore %outloc %res\n"
4247                 "            OpReturn\n"
4248
4249                 "            OpFunctionEnd\n";
4250         specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4251         specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4252         specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4253
4254         specVec3.assembly =
4255                 string(getComputeAsmShaderPreamble()) +
4256
4257                 "OpSource GLSL 430\n"
4258                 "OpName %main \"main\"\n"
4259                 "OpName %id \"gl_GlobalInvocationID\"\n"
4260
4261                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4262
4263                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4264
4265                 "%id = OpVariable %uvec3ptr Input\n"
4266                 "%zero       = OpConstant %i32 0\n"
4267                 "%float_0    = OpConstant %f32 0.0\n"
4268                 "%float_1    = OpConstant %f32 1.0\n"
4269                 "%float_n1   = OpConstant %f32 -1.0\n"
4270                 "%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4271                 "%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4272
4273                 "%main     = OpFunction %void None %voidf\n"
4274                 "%entry    = OpLabel\n"
4275                 "%idval    = OpLoad %uvec3 %id\n"
4276                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4277                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4278                 "%inval    = OpLoad %f32 %inloc\n"
4279
4280                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4281                 "            OpSelectionMerge %cm None\n"
4282                 "            OpBranchConditional %comp %tb %fb\n"
4283                 "%tb       = OpLabel\n"
4284                 "            OpBranch %cm\n"
4285                 "%fb       = OpLabel\n"
4286                 "            OpBranch %cm\n"
4287                 "%cm       = OpLabel\n"
4288                 "%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4289                 "%res      = OpCompositeExtract %f32 %vres 2\n"
4290
4291                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4292                 "            OpStore %outloc %res\n"
4293                 "            OpReturn\n"
4294
4295                 "            OpFunctionEnd\n";
4296         specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4297         specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4298         specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4299
4300         specInt.assembly =
4301                 string(getComputeAsmShaderPreamble()) +
4302
4303                 "OpSource GLSL 430\n"
4304                 "OpName %main \"main\"\n"
4305                 "OpName %id \"gl_GlobalInvocationID\"\n"
4306
4307                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4308
4309                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4310
4311                 "%id = OpVariable %uvec3ptr Input\n"
4312                 "%zero       = OpConstant %i32 0\n"
4313                 "%float_0    = OpConstant %f32 0.0\n"
4314                 "%i1         = OpConstant %i32 1\n"
4315                 "%i2         = OpConstant %i32 -1\n"
4316
4317                 "%main     = OpFunction %void None %voidf\n"
4318                 "%entry    = OpLabel\n"
4319                 "%idval    = OpLoad %uvec3 %id\n"
4320                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4321                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4322                 "%inval    = OpLoad %f32 %inloc\n"
4323
4324                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4325                 "            OpSelectionMerge %cm None\n"
4326                 "            OpBranchConditional %comp %tb %fb\n"
4327                 "%tb       = OpLabel\n"
4328                 "            OpBranch %cm\n"
4329                 "%fb       = OpLabel\n"
4330                 "            OpBranch %cm\n"
4331                 "%cm       = OpLabel\n"
4332                 "%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
4333                 "%res      = OpConvertSToF %f32 %ires\n"
4334
4335                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4336                 "            OpStore %outloc %res\n"
4337                 "            OpReturn\n"
4338
4339                 "            OpFunctionEnd\n";
4340         specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4341         specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4342         specInt.numWorkGroups = IVec3(numElements, 1, 1);
4343
4344         specArray.assembly =
4345                 string(getComputeAsmShaderPreamble()) +
4346
4347                 "OpSource GLSL 430\n"
4348                 "OpName %main \"main\"\n"
4349                 "OpName %id \"gl_GlobalInvocationID\"\n"
4350
4351                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4352
4353                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4354
4355                 "%id = OpVariable %uvec3ptr Input\n"
4356                 "%zero       = OpConstant %i32 0\n"
4357                 "%u7         = OpConstant %u32 7\n"
4358                 "%float_0    = OpConstant %f32 0.0\n"
4359                 "%float_1    = OpConstant %f32 1.0\n"
4360                 "%float_n1   = OpConstant %f32 -1.0\n"
4361                 "%f32a7      = OpTypeArray %f32 %u7\n"
4362                 "%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4363                 "%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
4364                 "%main     = OpFunction %void None %voidf\n"
4365                 "%entry    = OpLabel\n"
4366                 "%idval    = OpLoad %uvec3 %id\n"
4367                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4368                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4369                 "%inval    = OpLoad %f32 %inloc\n"
4370
4371                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4372                 "            OpSelectionMerge %cm None\n"
4373                 "            OpBranchConditional %comp %tb %fb\n"
4374                 "%tb       = OpLabel\n"
4375                 "            OpBranch %cm\n"
4376                 "%fb       = OpLabel\n"
4377                 "            OpBranch %cm\n"
4378                 "%cm       = OpLabel\n"
4379                 "%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4380                 "%res      = OpCompositeExtract %f32 %ares 5\n"
4381
4382                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4383                 "            OpStore %outloc %res\n"
4384                 "            OpReturn\n"
4385
4386                 "            OpFunctionEnd\n";
4387         specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4388         specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4389         specArray.numWorkGroups = IVec3(numElements, 1, 1);
4390
4391         specStruct.assembly =
4392                 string(getComputeAsmShaderPreamble()) +
4393
4394                 "OpSource GLSL 430\n"
4395                 "OpName %main \"main\"\n"
4396                 "OpName %id \"gl_GlobalInvocationID\"\n"
4397
4398                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4399
4400                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4401
4402                 "%id = OpVariable %uvec3ptr Input\n"
4403                 "%zero       = OpConstant %i32 0\n"
4404                 "%float_0    = OpConstant %f32 0.0\n"
4405                 "%float_1    = OpConstant %f32 1.0\n"
4406                 "%float_n1   = OpConstant %f32 -1.0\n"
4407
4408                 "%v2f32      = OpTypeVector %f32 2\n"
4409                 "%Data2      = OpTypeStruct %f32 %v2f32\n"
4410                 "%Data       = OpTypeStruct %Data2 %f32\n"
4411
4412                 "%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
4413                 "%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
4414                 "%s1         = OpConstantComposite %Data %in1b %float_1\n"
4415                 "%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4416                 "%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
4417                 "%s2         = OpConstantComposite %Data %in2b %float_n1\n"
4418
4419                 "%main     = OpFunction %void None %voidf\n"
4420                 "%entry    = OpLabel\n"
4421                 "%idval    = OpLoad %uvec3 %id\n"
4422                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4423                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4424                 "%inval    = OpLoad %f32 %inloc\n"
4425
4426                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4427                 "            OpSelectionMerge %cm None\n"
4428                 "            OpBranchConditional %comp %tb %fb\n"
4429                 "%tb       = OpLabel\n"
4430                 "            OpBranch %cm\n"
4431                 "%fb       = OpLabel\n"
4432                 "            OpBranch %cm\n"
4433                 "%cm       = OpLabel\n"
4434                 "%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
4435                 "%res      = OpCompositeExtract %f32 %sres 0 0\n"
4436
4437                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4438                 "            OpStore %outloc %res\n"
4439                 "            OpReturn\n"
4440
4441                 "            OpFunctionEnd\n";
4442         specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4443         specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4444         specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4445
4446         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4447         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4448         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4449         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4450         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4451         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4452         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4453 }
4454
4455 string generateConstantDefinitions (int count)
4456 {
4457         std::ostringstream      r;
4458         for (int i = 0; i < count; i++)
4459                 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4460         r << "\n";
4461         return r.str();
4462 }
4463
4464 string generateSwitchCases (int count)
4465 {
4466         std::ostringstream      r;
4467         for (int i = 0; i < count; i++)
4468                 r << " " << i << " %case" << i;
4469         r << "\n";
4470         return r.str();
4471 }
4472
4473 string generateSwitchTargets (int count)
4474 {
4475         std::ostringstream      r;
4476         for (int i = 0; i < count; i++)
4477                 r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
4478         r << "\n";
4479         return r.str();
4480 }
4481
4482 string generateOpPhiParams (int count)
4483 {
4484         std::ostringstream      r;
4485         for (int i = 0; i < count; i++)
4486                 r << " %cf" << (i * 10 + 5) << " %case" << i;
4487         r << "\n";
4488         return r.str();
4489 }
4490
4491 string generateIntWidth (int value)
4492 {
4493         std::ostringstream      r;
4494         r << value;
4495         return r.str();
4496 }
4497
4498 // Expand input string by injecting "ABC" between the input
4499 // string characters. The acc/add/treshold parameters are used
4500 // to skip some of the injections to make the result less
4501 // uniform (and a lot shorter).
4502 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4503 {
4504         std::ostringstream      res;
4505         const char*                     p = s.c_str();
4506
4507         while (*p)
4508         {
4509                 res << *p;
4510                 acc += add;
4511                 if (acc > treshold)
4512                 {
4513                         acc -= treshold;
4514                         res << "ABC";
4515                 }
4516                 p++;
4517         }
4518         return res.str();
4519 }
4520
4521 // Calculate expected result based on the code string
4522 float calcOpPhiCase5 (float val, const string& s)
4523 {
4524         const char*             p               = s.c_str();
4525         float                   x[8];
4526         bool                    b[8];
4527         const float             tv[8]   = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4528         const float             v               = deFloatAbs(val);
4529         float                   res             = 0;
4530         int                             depth   = -1;
4531         int                             skip    = 0;
4532
4533         for (int i = 7; i >= 0; --i)
4534                 x[i] = std::fmod((float)v, (float)(2 << i));
4535         for (int i = 7; i >= 0; --i)
4536                 b[i] = x[i] > tv[i];
4537
4538         while (*p)
4539         {
4540                 if (*p == 'A')
4541                 {
4542                         depth++;
4543                         if (skip == 0 && b[depth])
4544                         {
4545                                 res++;
4546                         }
4547                         else
4548                                 skip++;
4549                 }
4550                 if (*p == 'B')
4551                 {
4552                         if (skip)
4553                                 skip--;
4554                         if (b[depth] || skip)
4555                                 skip++;
4556                 }
4557                 if (*p == 'C')
4558                 {
4559                         depth--;
4560                         if (skip)
4561                                 skip--;
4562                 }
4563                 p++;
4564         }
4565         return res;
4566 }
4567
4568 // In the code string, the letters represent the following:
4569 //
4570 // A:
4571 //     if (certain bit is set)
4572 //     {
4573 //       result++;
4574 //
4575 // B:
4576 //     } else {
4577 //
4578 // C:
4579 //     }
4580 //
4581 // examples:
4582 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4583 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4584 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4585 //
4586 // Code generation gets a bit complicated due to the else-branches,
4587 // which do not generate new values. Thus, the generator needs to
4588 // keep track of the previous variable change seen by the else
4589 // branch.
4590 string generateOpPhiCase5 (const string& s)
4591 {
4592         std::stack<int>                         idStack;
4593         std::stack<std::string>         value;
4594         std::stack<std::string>         valueLabel;
4595         std::stack<std::string>         mergeLeft;
4596         std::stack<std::string>         mergeRight;
4597         std::ostringstream                      res;
4598         const char*                                     p                       = s.c_str();
4599         int                                                     depth           = -1;
4600         int                                                     currId          = 0;
4601         int                                                     iter            = 0;
4602
4603         idStack.push(-1);
4604         value.push("%f32_0");
4605         valueLabel.push("%f32_0 %entry");
4606
4607         while (*p)
4608         {
4609                 if (*p == 'A')
4610                 {
4611                         depth++;
4612                         currId = iter;
4613                         idStack.push(currId);
4614                         res << "\tOpSelectionMerge %m" << currId << " None\n";
4615                         res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4616                         res << "%t" << currId << " = OpLabel\n";
4617                         res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4618                         std::ostringstream tag;
4619                         tag << "%rt" << currId;
4620                         value.push(tag.str());
4621                         tag << " %t" << currId;
4622                         valueLabel.push(tag.str());
4623                 }
4624
4625                 if (*p == 'B')
4626                 {
4627                         mergeLeft.push(valueLabel.top());
4628                         value.pop();
4629                         valueLabel.pop();
4630                         res << "\tOpBranch %m" << currId << "\n";
4631                         res << "%f" << currId << " = OpLabel\n";
4632                         std::ostringstream tag;
4633                         tag << value.top() << " %f" << currId;
4634                         valueLabel.pop();
4635                         valueLabel.push(tag.str());
4636                 }
4637
4638                 if (*p == 'C')
4639                 {
4640                         mergeRight.push(valueLabel.top());
4641                         res << "\tOpBranch %m" << currId << "\n";
4642                         res << "%m" << currId << " = OpLabel\n";
4643                         if (*(p + 1) == 0)
4644                                 res << "%res"; // last result goes to %res
4645                         else
4646                                 res << "%rm" << currId;
4647                         res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
4648                         std::ostringstream tag;
4649                         tag << "%rm" << currId;
4650                         value.pop();
4651                         value.push(tag.str());
4652                         tag << " %m" << currId;
4653                         valueLabel.pop();
4654                         valueLabel.push(tag.str());
4655                         mergeLeft.pop();
4656                         mergeRight.pop();
4657                         depth--;
4658                         idStack.pop();
4659                         currId = idStack.top();
4660                 }
4661                 p++;
4662                 iter++;
4663         }
4664         return res.str();
4665 }
4666
4667 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4668 {
4669         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4670         ComputeShaderSpec                               spec1;
4671         ComputeShaderSpec                               spec2;
4672         ComputeShaderSpec                               spec3;
4673         ComputeShaderSpec                               spec4;
4674         ComputeShaderSpec                               spec5;
4675         de::Random                                              rnd                             (deStringHash(group->getName()));
4676         const int                                               numElements             = 100;
4677         vector<float>                                   inputFloats             (numElements, 0);
4678         vector<float>                                   outputFloats1   (numElements, 0);
4679         vector<float>                                   outputFloats2   (numElements, 0);
4680         vector<float>                                   outputFloats3   (numElements, 0);
4681         vector<float>                                   outputFloats4   (numElements, 0);
4682         vector<float>                                   outputFloats5   (numElements, 0);
4683         std::string                                             codestring              = "ABC";
4684         const int                                               test4Width              = 512;
4685
4686         // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4687         // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4688         // shader code.
4689         for (int i = 0, acc = 0; i < 9; i++)
4690                 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4691
4692         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4693
4694         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4695         floorAll(inputFloats);
4696
4697         for (size_t ndx = 0; ndx < numElements; ++ndx)
4698         {
4699                 switch (ndx % 3)
4700                 {
4701                         case 0:         outputFloats1[ndx] = inputFloats[ndx] + 5.5f;   break;
4702                         case 1:         outputFloats1[ndx] = inputFloats[ndx] + 20.5f;  break;
4703                         case 2:         outputFloats1[ndx] = inputFloats[ndx] + 1.75f;  break;
4704                         default:        break;
4705                 }
4706                 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4707                 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4708
4709                 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4710                 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4711
4712                 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4713         }
4714
4715         spec1.assembly =
4716                 string(getComputeAsmShaderPreamble()) +
4717
4718                 "OpSource GLSL 430\n"
4719                 "OpName %main \"main\"\n"
4720                 "OpName %id \"gl_GlobalInvocationID\"\n"
4721
4722                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4723
4724                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4725
4726                 "%id = OpVariable %uvec3ptr Input\n"
4727                 "%zero       = OpConstant %i32 0\n"
4728                 "%three      = OpConstant %u32 3\n"
4729                 "%constf5p5  = OpConstant %f32 5.5\n"
4730                 "%constf20p5 = OpConstant %f32 20.5\n"
4731                 "%constf1p75 = OpConstant %f32 1.75\n"
4732                 "%constf8p5  = OpConstant %f32 8.5\n"
4733                 "%constf6p5  = OpConstant %f32 6.5\n"
4734
4735                 "%main     = OpFunction %void None %voidf\n"
4736                 "%entry    = OpLabel\n"
4737                 "%idval    = OpLoad %uvec3 %id\n"
4738                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4739                 "%selector = OpUMod %u32 %x %three\n"
4740                 "            OpSelectionMerge %phi None\n"
4741                 "            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4742
4743                 // Case 1 before OpPhi.
4744                 "%case1    = OpLabel\n"
4745                 "            OpBranch %phi\n"
4746
4747                 "%default  = OpLabel\n"
4748                 "            OpUnreachable\n"
4749
4750                 "%phi      = OpLabel\n"
4751                 "%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
4752                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4753                 "%inval    = OpLoad %f32 %inloc\n"
4754                 "%add      = OpFAdd %f32 %inval %operand\n"
4755                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4756                 "            OpStore %outloc %add\n"
4757                 "            OpReturn\n"
4758
4759                 // Case 0 after OpPhi.
4760                 "%case0    = OpLabel\n"
4761                 "            OpBranch %phi\n"
4762
4763
4764                 // Case 2 after OpPhi.
4765                 "%case2    = OpLabel\n"
4766                 "            OpBranch %phi\n"
4767
4768                 "            OpFunctionEnd\n";
4769         spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4770         spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4771         spec1.numWorkGroups = IVec3(numElements, 1, 1);
4772
4773         group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4774
4775         spec2.assembly =
4776                 string(getComputeAsmShaderPreamble()) +
4777
4778                 "OpName %main \"main\"\n"
4779                 "OpName %id \"gl_GlobalInvocationID\"\n"
4780
4781                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4782
4783                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4784
4785                 "%id         = OpVariable %uvec3ptr Input\n"
4786                 "%zero       = OpConstant %i32 0\n"
4787                 "%one        = OpConstant %i32 1\n"
4788                 "%three      = OpConstant %i32 3\n"
4789                 "%constf6p5  = OpConstant %f32 6.5\n"
4790
4791                 "%main       = OpFunction %void None %voidf\n"
4792                 "%entry      = OpLabel\n"
4793                 "%idval      = OpLoad %uvec3 %id\n"
4794                 "%x          = OpCompositeExtract %u32 %idval 0\n"
4795                 "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4796                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4797                 "%inval      = OpLoad %f32 %inloc\n"
4798                 "              OpBranch %phi\n"
4799
4800                 "%phi        = OpLabel\n"
4801                 "%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
4802                 "%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
4803                 "%step_next  = OpIAdd %i32 %step %one\n"
4804                 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4805                 "%still_loop = OpSLessThan %bool %step %three\n"
4806                 "              OpLoopMerge %exit %phi None\n"
4807                 "              OpBranchConditional %still_loop %phi %exit\n"
4808
4809                 "%exit       = OpLabel\n"
4810                 "              OpStore %outloc %accum\n"
4811                 "              OpReturn\n"
4812                 "              OpFunctionEnd\n";
4813         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4814         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4815         spec2.numWorkGroups = IVec3(numElements, 1, 1);
4816
4817         group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4818
4819         spec3.assembly =
4820                 string(getComputeAsmShaderPreamble()) +
4821
4822                 "OpName %main \"main\"\n"
4823                 "OpName %id \"gl_GlobalInvocationID\"\n"
4824
4825                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4826
4827                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4828
4829                 "%f32ptr_f   = OpTypePointer Function %f32\n"
4830                 "%id         = OpVariable %uvec3ptr Input\n"
4831                 "%true       = OpConstantTrue %bool\n"
4832                 "%false      = OpConstantFalse %bool\n"
4833                 "%zero       = OpConstant %i32 0\n"
4834                 "%constf8p5  = OpConstant %f32 8.5\n"
4835
4836                 "%main       = OpFunction %void None %voidf\n"
4837                 "%entry      = OpLabel\n"
4838                 "%b          = OpVariable %f32ptr_f Function %constf8p5\n"
4839                 "%idval      = OpLoad %uvec3 %id\n"
4840                 "%x          = OpCompositeExtract %u32 %idval 0\n"
4841                 "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4842                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4843                 "%a_init     = OpLoad %f32 %inloc\n"
4844                 "%b_init     = OpLoad %f32 %b\n"
4845                 "              OpBranch %phi\n"
4846
4847                 "%phi        = OpLabel\n"
4848                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
4849                 "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
4850                 "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
4851                 "              OpLoopMerge %exit %phi None\n"
4852                 "              OpBranchConditional %still_loop %phi %exit\n"
4853
4854                 "%exit       = OpLabel\n"
4855                 "%sub        = OpFSub %f32 %a_next %b_next\n"
4856                 "              OpStore %outloc %sub\n"
4857                 "              OpReturn\n"
4858                 "              OpFunctionEnd\n";
4859         spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4860         spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4861         spec3.numWorkGroups = IVec3(numElements, 1, 1);
4862
4863         group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4864
4865         spec4.assembly =
4866                 "OpCapability Shader\n"
4867                 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4868                 "OpMemoryModel Logical GLSL450\n"
4869                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4870                 "OpExecutionMode %main LocalSize 1 1 1\n"
4871
4872                 "OpSource GLSL 430\n"
4873                 "OpName %main \"main\"\n"
4874                 "OpName %id \"gl_GlobalInvocationID\"\n"
4875
4876                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4877
4878                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4879
4880                 "%id       = OpVariable %uvec3ptr Input\n"
4881                 "%zero     = OpConstant %i32 0\n"
4882                 "%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4883
4884                 + generateConstantDefinitions(test4Width) +
4885
4886                 "%main     = OpFunction %void None %voidf\n"
4887                 "%entry    = OpLabel\n"
4888                 "%idval    = OpLoad %uvec3 %id\n"
4889                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4890                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4891                 "%inval    = OpLoad %f32 %inloc\n"
4892                 "%xf       = OpConvertUToF %f32 %x\n"
4893                 "%xm       = OpFMul %f32 %xf %inval\n"
4894                 "%xa       = OpExtInst %f32 %ext FAbs %xm\n"
4895                 "%xi       = OpConvertFToU %u32 %xa\n"
4896                 "%selector = OpUMod %u32 %xi %cimod\n"
4897                 "            OpSelectionMerge %phi None\n"
4898                 "            OpSwitch %selector %default "
4899
4900                 + generateSwitchCases(test4Width) +
4901
4902                 "%default  = OpLabel\n"
4903                 "            OpUnreachable\n"
4904
4905                 + generateSwitchTargets(test4Width) +
4906
4907                 "%phi      = OpLabel\n"
4908                 "%result   = OpPhi %f32"
4909
4910                 + generateOpPhiParams(test4Width) +
4911
4912                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4913                 "            OpStore %outloc %result\n"
4914                 "            OpReturn\n"
4915
4916                 "            OpFunctionEnd\n";
4917         spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4918         spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4919         spec4.numWorkGroups = IVec3(numElements, 1, 1);
4920
4921         group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4922
4923         spec5.assembly =
4924                 "OpCapability Shader\n"
4925                 "%ext      = OpExtInstImport \"GLSL.std.450\"\n"
4926                 "OpMemoryModel Logical GLSL450\n"
4927                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4928                 "OpExecutionMode %main LocalSize 1 1 1\n"
4929                 "%code     = OpString \"" + codestring + "\"\n"
4930
4931                 "OpSource GLSL 430\n"
4932                 "OpName %main \"main\"\n"
4933                 "OpName %id \"gl_GlobalInvocationID\"\n"
4934
4935                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4936
4937                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4938
4939                 "%id       = OpVariable %uvec3ptr Input\n"
4940                 "%zero     = OpConstant %i32 0\n"
4941                 "%f32_0    = OpConstant %f32 0.0\n"
4942                 "%f32_0_5  = OpConstant %f32 0.5\n"
4943                 "%f32_1    = OpConstant %f32 1.0\n"
4944                 "%f32_1_5  = OpConstant %f32 1.5\n"
4945                 "%f32_2    = OpConstant %f32 2.0\n"
4946                 "%f32_3_5  = OpConstant %f32 3.5\n"
4947                 "%f32_4    = OpConstant %f32 4.0\n"
4948                 "%f32_7_5  = OpConstant %f32 7.5\n"
4949                 "%f32_8    = OpConstant %f32 8.0\n"
4950                 "%f32_15_5 = OpConstant %f32 15.5\n"
4951                 "%f32_16   = OpConstant %f32 16.0\n"
4952                 "%f32_31_5 = OpConstant %f32 31.5\n"
4953                 "%f32_32   = OpConstant %f32 32.0\n"
4954                 "%f32_63_5 = OpConstant %f32 63.5\n"
4955                 "%f32_64   = OpConstant %f32 64.0\n"
4956                 "%f32_127_5 = OpConstant %f32 127.5\n"
4957                 "%f32_128  = OpConstant %f32 128.0\n"
4958                 "%f32_256  = OpConstant %f32 256.0\n"
4959
4960                 "%main     = OpFunction %void None %voidf\n"
4961                 "%entry    = OpLabel\n"
4962                 "%idval    = OpLoad %uvec3 %id\n"
4963                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4964                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4965                 "%inval    = OpLoad %f32 %inloc\n"
4966
4967                 "%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
4968                 "%x8       = OpFMod %f32 %xabs %f32_256\n"
4969                 "%x7       = OpFMod %f32 %xabs %f32_128\n"
4970                 "%x6       = OpFMod %f32 %xabs %f32_64\n"
4971                 "%x5       = OpFMod %f32 %xabs %f32_32\n"
4972                 "%x4       = OpFMod %f32 %xabs %f32_16\n"
4973                 "%x3       = OpFMod %f32 %xabs %f32_8\n"
4974                 "%x2       = OpFMod %f32 %xabs %f32_4\n"
4975                 "%x1       = OpFMod %f32 %xabs %f32_2\n"
4976
4977                 "%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4978                 "%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4979                 "%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4980                 "%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4981                 "%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4982                 "%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4983                 "%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4984                 "%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4985
4986                 + generateOpPhiCase5(codestring) +
4987
4988                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4989                 "            OpStore %outloc %res\n"
4990                 "            OpReturn\n"
4991
4992                 "            OpFunctionEnd\n";
4993         spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4994         spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4995         spec5.numWorkGroups = IVec3(numElements, 1, 1);
4996
4997         group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4998
4999         createOpPhiVartypeTests(group, testCtx);
5000
5001         return group.release();
5002 }
5003
5004 // Assembly code used for testing block order is based on GLSL source code:
5005 //
5006 // #version 430
5007 //
5008 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5009 //   float elements[];
5010 // } input_data;
5011 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5012 //   float elements[];
5013 // } output_data;
5014 //
5015 // void main() {
5016 //   uint x = gl_GlobalInvocationID.x;
5017 //   output_data.elements[x] = input_data.elements[x];
5018 //   if (x > uint(50)) {
5019 //     switch (x % uint(3)) {
5020 //       case 0: output_data.elements[x] += 1.5f; break;
5021 //       case 1: output_data.elements[x] += 42.f; break;
5022 //       case 2: output_data.elements[x] -= 27.f; break;
5023 //       default: break;
5024 //     }
5025 //   } else {
5026 //     output_data.elements[x] = -input_data.elements[x];
5027 //   }
5028 // }
5029 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
5030 {
5031         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
5032         ComputeShaderSpec                               spec;
5033         de::Random                                              rnd                             (deStringHash(group->getName()));
5034         const int                                               numElements             = 100;
5035         vector<float>                                   inputFloats             (numElements, 0);
5036         vector<float>                                   outputFloats    (numElements, 0);
5037
5038         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5039
5040         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5041         floorAll(inputFloats);
5042
5043         for (size_t ndx = 0; ndx <= 50; ++ndx)
5044                 outputFloats[ndx] = -inputFloats[ndx];
5045
5046         for (size_t ndx = 51; ndx < numElements; ++ndx)
5047         {
5048                 switch (ndx % 3)
5049                 {
5050                         case 0:         outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
5051                         case 1:         outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
5052                         case 2:         outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
5053                         default:        break;
5054                 }
5055         }
5056
5057         spec.assembly =
5058                 string(getComputeAsmShaderPreamble()) +
5059
5060                 "OpSource GLSL 430\n"
5061                 "OpName %main \"main\"\n"
5062                 "OpName %id \"gl_GlobalInvocationID\"\n"
5063
5064                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5065
5066                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5067
5068                 "%u32ptr       = OpTypePointer Function %u32\n"
5069                 "%u32ptr_input = OpTypePointer Input %u32\n"
5070
5071                 + string(getComputeAsmInputOutputBuffer()) +
5072
5073                 "%id        = OpVariable %uvec3ptr Input\n"
5074                 "%zero      = OpConstant %i32 0\n"
5075                 "%const3    = OpConstant %u32 3\n"
5076                 "%const50   = OpConstant %u32 50\n"
5077                 "%constf1p5 = OpConstant %f32 1.5\n"
5078                 "%constf27  = OpConstant %f32 27.0\n"
5079                 "%constf42  = OpConstant %f32 42.0\n"
5080
5081                 "%main = OpFunction %void None %voidf\n"
5082
5083                 // entry block.
5084                 "%entry    = OpLabel\n"
5085
5086                 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5087                 "%xvar     = OpVariable %u32ptr Function\n"
5088                 "%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
5089                 "%x        = OpLoad %u32 %xptr\n"
5090                 "            OpStore %xvar %x\n"
5091
5092                 "%cmp      = OpUGreaterThan %bool %x %const50\n"
5093                 "            OpSelectionMerge %if_merge None\n"
5094                 "            OpBranchConditional %cmp %if_true %if_false\n"
5095
5096                 // False branch for if-statement: placed in the middle of switch cases and before true branch.
5097                 "%if_false = OpLabel\n"
5098                 "%x_f      = OpLoad %u32 %xvar\n"
5099                 "%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
5100                 "%inval_f  = OpLoad %f32 %inloc_f\n"
5101                 "%negate   = OpFNegate %f32 %inval_f\n"
5102                 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5103                 "            OpStore %outloc_f %negate\n"
5104                 "            OpBranch %if_merge\n"
5105
5106                 // Merge block for if-statement: placed in the middle of true and false branch.
5107                 "%if_merge = OpLabel\n"
5108                 "            OpReturn\n"
5109
5110                 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5111                 "%if_true  = OpLabel\n"
5112                 "%xval_t   = OpLoad %u32 %xvar\n"
5113                 "%mod      = OpUMod %u32 %xval_t %const3\n"
5114                 "            OpSelectionMerge %switch_merge None\n"
5115                 "            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5116
5117                 // Merge block for switch-statement: placed before the case
5118                 // bodies.  But it must follow OpSwitch which dominates it.
5119                 "%switch_merge = OpLabel\n"
5120                 "                OpBranch %if_merge\n"
5121
5122                 // Case 1 for switch-statement: placed before case 0.
5123                 // It must follow the OpSwitch that dominates it.
5124                 "%case1    = OpLabel\n"
5125                 "%x_1      = OpLoad %u32 %xvar\n"
5126                 "%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
5127                 "%inval_1  = OpLoad %f32 %inloc_1\n"
5128                 "%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
5129                 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5130                 "            OpStore %outloc_1 %addf42\n"
5131                 "            OpBranch %switch_merge\n"
5132
5133                 // Case 2 for switch-statement.
5134                 "%case2    = OpLabel\n"
5135                 "%x_2      = OpLoad %u32 %xvar\n"
5136                 "%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
5137                 "%inval_2  = OpLoad %f32 %inloc_2\n"
5138                 "%subf27   = OpFSub %f32 %inval_2 %constf27\n"
5139                 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5140                 "            OpStore %outloc_2 %subf27\n"
5141                 "            OpBranch %switch_merge\n"
5142
5143                 // Default case for switch-statement: placed in the middle of normal cases.
5144                 "%default = OpLabel\n"
5145                 "           OpBranch %switch_merge\n"
5146
5147                 // Case 0 for switch-statement: out of order.
5148                 "%case0    = OpLabel\n"
5149                 "%x_0      = OpLoad %u32 %xvar\n"
5150                 "%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
5151                 "%inval_0  = OpLoad %f32 %inloc_0\n"
5152                 "%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
5153                 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5154                 "            OpStore %outloc_0 %addf1p5\n"
5155                 "            OpBranch %switch_merge\n"
5156
5157                 "            OpFunctionEnd\n";
5158         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5159         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5160         spec.numWorkGroups = IVec3(numElements, 1, 1);
5161
5162         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
5163
5164         return group.release();
5165 }
5166
5167 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
5168 {
5169         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
5170         ComputeShaderSpec                               spec1;
5171         ComputeShaderSpec                               spec2;
5172         de::Random                                              rnd                             (deStringHash(group->getName()));
5173         const int                                               numElements             = 100;
5174         vector<float>                                   inputFloats             (numElements, 0);
5175         vector<float>                                   outputFloats1   (numElements, 0);
5176         vector<float>                                   outputFloats2   (numElements, 0);
5177         fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5178
5179         for (size_t ndx = 0; ndx < numElements; ++ndx)
5180         {
5181                 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5182                 outputFloats2[ndx] = -inputFloats[ndx];
5183         }
5184
5185         const string assembly(
5186                 "OpCapability Shader\n"
5187                 "OpMemoryModel Logical GLSL450\n"
5188                 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5189                 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5190                 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5191                 "OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5192                 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5193                 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5194
5195                 "OpName %comp_main1              \"entrypoint1\"\n"
5196                 "OpName %comp_main2              \"entrypoint2\"\n"
5197                 "OpName %vert_main               \"entrypoint2\"\n"
5198                 "OpName %id                      \"gl_GlobalInvocationID\"\n"
5199                 "OpName %vert_builtin_st         \"gl_PerVertex\"\n"
5200                 "OpName %vertexIndex             \"gl_VertexIndex\"\n"
5201                 "OpName %instanceIndex           \"gl_InstanceIndex\"\n"
5202                 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5203                 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5204                 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5205
5206                 "OpDecorate %id                      BuiltIn GlobalInvocationId\n"
5207                 "OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
5208                 "OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
5209                 "OpDecorate %vert_builtin_st         Block\n"
5210                 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5211                 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5212                 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5213
5214                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5215
5216                 "%zero       = OpConstant %i32 0\n"
5217                 "%one        = OpConstant %u32 1\n"
5218                 "%c_f32_1    = OpConstant %f32 1\n"
5219
5220                 "%i32inputptr         = OpTypePointer Input %i32\n"
5221                 "%vec4                = OpTypeVector %f32 4\n"
5222                 "%vec4ptr             = OpTypePointer Output %vec4\n"
5223                 "%f32arr1             = OpTypeArray %f32 %one\n"
5224                 "%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
5225                 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5226                 "%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
5227
5228                 "%id         = OpVariable %uvec3ptr Input\n"
5229                 "%vertexIndex = OpVariable %i32inputptr Input\n"
5230                 "%instanceIndex = OpVariable %i32inputptr Input\n"
5231                 "%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5232
5233                 // gl_Position = vec4(1.);
5234                 "%vert_main  = OpFunction %void None %voidf\n"
5235                 "%vert_entry = OpLabel\n"
5236                 "%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5237                 "              OpStore %position %c_vec4_1\n"
5238                 "              OpReturn\n"
5239                 "              OpFunctionEnd\n"
5240
5241                 // Double inputs.
5242                 "%comp_main1  = OpFunction %void None %voidf\n"
5243                 "%comp1_entry = OpLabel\n"
5244                 "%idval1      = OpLoad %uvec3 %id\n"
5245                 "%x1          = OpCompositeExtract %u32 %idval1 0\n"
5246                 "%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
5247                 "%inval1      = OpLoad %f32 %inloc1\n"
5248                 "%add         = OpFAdd %f32 %inval1 %inval1\n"
5249                 "%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
5250                 "               OpStore %outloc1 %add\n"
5251                 "               OpReturn\n"
5252                 "               OpFunctionEnd\n"
5253
5254                 // Negate inputs.
5255                 "%comp_main2  = OpFunction %void None %voidf\n"
5256                 "%comp2_entry = OpLabel\n"
5257                 "%idval2      = OpLoad %uvec3 %id\n"
5258                 "%x2          = OpCompositeExtract %u32 %idval2 0\n"
5259                 "%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
5260                 "%inval2      = OpLoad %f32 %inloc2\n"
5261                 "%neg         = OpFNegate %f32 %inval2\n"
5262                 "%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
5263                 "               OpStore %outloc2 %neg\n"
5264                 "               OpReturn\n"
5265                 "               OpFunctionEnd\n");
5266
5267         spec1.assembly = assembly;
5268         spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5269         spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5270         spec1.numWorkGroups = IVec3(numElements, 1, 1);
5271         spec1.entryPoint = "entrypoint1";
5272
5273         spec2.assembly = assembly;
5274         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5275         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5276         spec2.numWorkGroups = IVec3(numElements, 1, 1);
5277         spec2.entryPoint = "entrypoint2";
5278
5279         group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
5280         group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
5281
5282         return group.release();
5283 }
5284
5285 inline std::string makeLongUTF8String (size_t num4ByteChars)
5286 {
5287         // An example of a longest valid UTF-8 character.  Be explicit about the
5288         // character type because Microsoft compilers can otherwise interpret the
5289         // character string as being over wide (16-bit) characters. Ideally, we
5290         // would just use a C++11 UTF-8 string literal, but we want to support older
5291         // Microsoft compilers.
5292         const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5293         std::string longString;
5294         longString.reserve(num4ByteChars * 4);
5295         for (size_t count = 0; count < num4ByteChars; count++)
5296         {
5297                 longString += earthAfrica;
5298         }
5299         return longString;
5300 }
5301
5302 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
5303 {
5304         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
5305         vector<CaseParameter>                   cases;
5306         de::Random                                              rnd                             (deStringHash(group->getName()));
5307         const int                                               numElements             = 100;
5308         vector<float>                                   positiveFloats  (numElements, 0);
5309         vector<float>                                   negativeFloats  (numElements, 0);
5310         const StringTemplate                    shaderTemplate  (
5311                 "OpCapability Shader\n"
5312                 "OpMemoryModel Logical GLSL450\n"
5313
5314                 "OpEntryPoint GLCompute %main \"main\" %id\n"
5315                 "OpExecutionMode %main LocalSize 1 1 1\n"
5316
5317                 "${SOURCE}\n"
5318
5319                 "OpName %main           \"main\"\n"
5320                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5321
5322                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5323
5324                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5325
5326                 "%id        = OpVariable %uvec3ptr Input\n"
5327                 "%zero      = OpConstant %i32 0\n"
5328
5329                 "%main      = OpFunction %void None %voidf\n"
5330                 "%label     = OpLabel\n"
5331                 "%idval     = OpLoad %uvec3 %id\n"
5332                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5333                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5334                 "%inval     = OpLoad %f32 %inloc\n"
5335                 "%neg       = OpFNegate %f32 %inval\n"
5336                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5337                 "             OpStore %outloc %neg\n"
5338                 "             OpReturn\n"
5339                 "             OpFunctionEnd\n");
5340
5341         cases.push_back(CaseParameter("unknown_source",                                                 "OpSource Unknown 0"));
5342         cases.push_back(CaseParameter("wrong_source",                                                   "OpSource OpenCL_C 210"));
5343         cases.push_back(CaseParameter("normal_filename",                                                "%fname = OpString \"filename\"\n"
5344                                                                                                                                                         "OpSource GLSL 430 %fname"));
5345         cases.push_back(CaseParameter("empty_filename",                                                 "%fname = OpString \"\"\n"
5346                                                                                                                                                         "OpSource GLSL 430 %fname"));
5347         cases.push_back(CaseParameter("normal_source_code",                                             "%fname = OpString \"filename\"\n"
5348                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5349         cases.push_back(CaseParameter("empty_source_code",                                              "%fname = OpString \"filename\"\n"
5350                                                                                                                                                         "OpSource GLSL 430 %fname \"\""));
5351         cases.push_back(CaseParameter("long_source_code",                                               "%fname = OpString \"filename\"\n"
5352                                                                                                                                                         "OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5353         cases.push_back(CaseParameter("utf8_source_code",                                               "%fname = OpString \"filename\"\n"
5354                                                                                                                                                         "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5355         cases.push_back(CaseParameter("normal_sourcecontinued",                                 "%fname = OpString \"filename\"\n"
5356                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5357                                                                                                                                                         "OpSourceContinued \"id main() {}\""));
5358         cases.push_back(CaseParameter("empty_sourcecontinued",                                  "%fname = OpString \"filename\"\n"
5359                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5360                                                                                                                                                         "OpSourceContinued \"\""));
5361         cases.push_back(CaseParameter("long_sourcecontinued",                                   "%fname = OpString \"filename\"\n"
5362                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5363                                                                                                                                                         "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
5364         cases.push_back(CaseParameter("utf8_sourcecontinued",                                   "%fname = OpString \"filename\"\n"
5365                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5366                                                                                                                                                         "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5367         cases.push_back(CaseParameter("multi_sourcecontinued",                                  "%fname = OpString \"filename\"\n"
5368                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\n\"\n"
5369                                                                                                                                                         "OpSourceContinued \"void\"\n"
5370                                                                                                                                                         "OpSourceContinued \"main()\"\n"
5371                                                                                                                                                         "OpSourceContinued \"{}\""));
5372         cases.push_back(CaseParameter("empty_source_before_sourcecontinued",    "%fname = OpString \"filename\"\n"
5373                                                                                                                                                         "OpSource GLSL 430 %fname \"\"\n"
5374                                                                                                                                                         "OpSourceContinued \"#version 430\nvoid main() {}\""));
5375
5376         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5377
5378         for (size_t ndx = 0; ndx < numElements; ++ndx)
5379                 negativeFloats[ndx] = -positiveFloats[ndx];
5380
5381         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5382         {
5383                 map<string, string>             specializations;
5384                 ComputeShaderSpec               spec;
5385
5386                 specializations["SOURCE"] = cases[caseNdx].param;
5387                 spec.assembly = shaderTemplate.specialize(specializations);
5388                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5389                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5390                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5391
5392                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5393         }
5394
5395         return group.release();
5396 }
5397
5398 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5399 {
5400         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5401         vector<CaseParameter>                   cases;
5402         de::Random                                              rnd                             (deStringHash(group->getName()));
5403         const int                                               numElements             = 100;
5404         vector<float>                                   inputFloats             (numElements, 0);
5405         vector<float>                                   outputFloats    (numElements, 0);
5406         const StringTemplate                    shaderTemplate  (
5407                 string(getComputeAsmShaderPreamble()) +
5408
5409                 "OpSourceExtension \"${EXTENSION}\"\n"
5410
5411                 "OpName %main           \"main\"\n"
5412                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5413
5414                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5415
5416                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5417
5418                 "%id        = OpVariable %uvec3ptr Input\n"
5419                 "%zero      = OpConstant %i32 0\n"
5420
5421                 "%main      = OpFunction %void None %voidf\n"
5422                 "%label     = OpLabel\n"
5423                 "%idval     = OpLoad %uvec3 %id\n"
5424                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5425                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5426                 "%inval     = OpLoad %f32 %inloc\n"
5427                 "%neg       = OpFNegate %f32 %inval\n"
5428                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5429                 "             OpStore %outloc %neg\n"
5430                 "             OpReturn\n"
5431                 "             OpFunctionEnd\n");
5432
5433         cases.push_back(CaseParameter("empty_extension",        ""));
5434         cases.push_back(CaseParameter("real_extension",         "GL_ARB_texture_rectangle"));
5435         cases.push_back(CaseParameter("fake_extension",         "GL_ARB_im_the_ultimate_extension"));
5436         cases.push_back(CaseParameter("utf8_extension",         "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5437         cases.push_back(CaseParameter("long_extension",         makeLongUTF8String(65533) + "ccc")); // word count: 65535
5438
5439         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5440
5441         for (size_t ndx = 0; ndx < numElements; ++ndx)
5442                 outputFloats[ndx] = -inputFloats[ndx];
5443
5444         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5445         {
5446                 map<string, string>             specializations;
5447                 ComputeShaderSpec               spec;
5448
5449                 specializations["EXTENSION"] = cases[caseNdx].param;
5450                 spec.assembly = shaderTemplate.specialize(specializations);
5451                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5452                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5453                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5454
5455                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5456         }
5457
5458         return group.release();
5459 }
5460
5461 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
5462 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5463 {
5464         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5465         vector<CaseParameter>                   cases;
5466         de::Random                                              rnd                             (deStringHash(group->getName()));
5467         const int                                               numElements             = 100;
5468         vector<float>                                   positiveFloats  (numElements, 0);
5469         vector<float>                                   negativeFloats  (numElements, 0);
5470         const StringTemplate                    shaderTemplate  (
5471                 string(getComputeAsmShaderPreamble()) +
5472
5473                 "OpSource GLSL 430\n"
5474                 "OpName %main           \"main\"\n"
5475                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5476
5477                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5478
5479                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5480                 "%uvec2     = OpTypeVector %u32 2\n"
5481                 "%bvec3     = OpTypeVector %bool 3\n"
5482                 "%fvec4     = OpTypeVector %f32 4\n"
5483                 "%fmat33    = OpTypeMatrix %fvec3 3\n"
5484                 "%const100  = OpConstant %u32 100\n"
5485                 "%uarr100   = OpTypeArray %i32 %const100\n"
5486                 "%struct    = OpTypeStruct %f32 %i32 %u32\n"
5487                 "%pointer   = OpTypePointer Function %i32\n"
5488                 + string(getComputeAsmInputOutputBuffer()) +
5489
5490                 "%null      = OpConstantNull ${TYPE}\n"
5491
5492                 "%id        = OpVariable %uvec3ptr Input\n"
5493                 "%zero      = OpConstant %i32 0\n"
5494
5495                 "%main      = OpFunction %void None %voidf\n"
5496                 "%label     = OpLabel\n"
5497                 "%idval     = OpLoad %uvec3 %id\n"
5498                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5499                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5500                 "%inval     = OpLoad %f32 %inloc\n"
5501                 "%neg       = OpFNegate %f32 %inval\n"
5502                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5503                 "             OpStore %outloc %neg\n"
5504                 "             OpReturn\n"
5505                 "             OpFunctionEnd\n");
5506
5507         cases.push_back(CaseParameter("bool",                   "%bool"));
5508         cases.push_back(CaseParameter("sint32",                 "%i32"));
5509         cases.push_back(CaseParameter("uint32",                 "%u32"));
5510         cases.push_back(CaseParameter("float32",                "%f32"));
5511         cases.push_back(CaseParameter("vec4float32",    "%fvec4"));
5512         cases.push_back(CaseParameter("vec3bool",               "%bvec3"));
5513         cases.push_back(CaseParameter("vec2uint32",             "%uvec2"));
5514         cases.push_back(CaseParameter("matrix",                 "%fmat33"));
5515         cases.push_back(CaseParameter("array",                  "%uarr100"));
5516         cases.push_back(CaseParameter("struct",                 "%struct"));
5517         cases.push_back(CaseParameter("pointer",                "%pointer"));
5518
5519         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5520
5521         for (size_t ndx = 0; ndx < numElements; ++ndx)
5522                 negativeFloats[ndx] = -positiveFloats[ndx];
5523
5524         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5525         {
5526                 map<string, string>             specializations;
5527                 ComputeShaderSpec               spec;
5528
5529                 specializations["TYPE"] = cases[caseNdx].param;
5530                 spec.assembly = shaderTemplate.specialize(specializations);
5531                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5532                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5533                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5534
5535                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5536         }
5537
5538         return group.release();
5539 }
5540
5541 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5542 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5543 {
5544         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5545         vector<CaseParameter>                   cases;
5546         de::Random                                              rnd                             (deStringHash(group->getName()));
5547         const int                                               numElements             = 100;
5548         vector<float>                                   positiveFloats  (numElements, 0);
5549         vector<float>                                   negativeFloats  (numElements, 0);
5550         const StringTemplate                    shaderTemplate  (
5551                 string(getComputeAsmShaderPreamble()) +
5552
5553                 "OpSource GLSL 430\n"
5554                 "OpName %main           \"main\"\n"
5555                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5556
5557                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5558
5559                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5560
5561                 "%id        = OpVariable %uvec3ptr Input\n"
5562                 "%zero      = OpConstant %i32 0\n"
5563
5564                 "${CONSTANT}\n"
5565
5566                 "%main      = OpFunction %void None %voidf\n"
5567                 "%label     = OpLabel\n"
5568                 "%idval     = OpLoad %uvec3 %id\n"
5569                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5570                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5571                 "%inval     = OpLoad %f32 %inloc\n"
5572                 "%neg       = OpFNegate %f32 %inval\n"
5573                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5574                 "             OpStore %outloc %neg\n"
5575                 "             OpReturn\n"
5576                 "             OpFunctionEnd\n");
5577
5578         cases.push_back(CaseParameter("vector",                 "%five = OpConstant %u32 5\n"
5579                                                                                                         "%const = OpConstantComposite %uvec3 %five %zero %five"));
5580         cases.push_back(CaseParameter("matrix",                 "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5581                                                                                                         "%ten = OpConstant %f32 10.\n"
5582                                                                                                         "%fzero = OpConstant %f32 0.\n"
5583                                                                                                         "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5584                                                                                                         "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5585         cases.push_back(CaseParameter("struct",                 "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5586                                                                                                         "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5587                                                                                                         "%fzero = OpConstant %f32 0.\n"
5588                                                                                                         "%one = OpConstant %f32 1.\n"
5589                                                                                                         "%point5 = OpConstant %f32 0.5\n"
5590                                                                                                         "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5591                                                                                                         "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5592                                                                                                         "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5593         cases.push_back(CaseParameter("nested_struct",  "%st1 = OpTypeStruct %u32 %f32\n"
5594                                                                                                         "%st2 = OpTypeStruct %i32 %i32\n"
5595                                                                                                         "%struct = OpTypeStruct %st1 %st2\n"
5596                                                                                                         "%point5 = OpConstant %f32 0.5\n"
5597                                                                                                         "%one = OpConstant %u32 1\n"
5598                                                                                                         "%ten = OpConstant %i32 10\n"
5599                                                                                                         "%st1val = OpConstantComposite %st1 %one %point5\n"
5600                                                                                                         "%st2val = OpConstantComposite %st2 %ten %ten\n"
5601                                                                                                         "%const = OpConstantComposite %struct %st1val %st2val"));
5602
5603         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5604
5605         for (size_t ndx = 0; ndx < numElements; ++ndx)
5606                 negativeFloats[ndx] = -positiveFloats[ndx];
5607
5608         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5609         {
5610                 map<string, string>             specializations;
5611                 ComputeShaderSpec               spec;
5612
5613                 specializations["CONSTANT"] = cases[caseNdx].param;
5614                 spec.assembly = shaderTemplate.specialize(specializations);
5615                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5616                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5617                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5618
5619                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5620         }
5621
5622         return group.release();
5623 }
5624
5625 // Creates a floating point number with the given exponent, and significand
5626 // bits set. It can only create normalized numbers. Only the least significant
5627 // 24 bits of the significand will be examined. The final bit of the
5628 // significand will also be ignored. This allows alignment to be written
5629 // similarly to C99 hex-floats.
5630 // For example if you wanted to write 0x1.7f34p-12 you would call
5631 // constructNormalizedFloat(-12, 0x7f3400)
5632 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5633 {
5634         float f = 1.0f;
5635
5636         for (deInt32 idx = 0; idx < 23; ++idx)
5637         {
5638                 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5639                 significand <<= 1;
5640         }
5641
5642         return std::ldexp(f, exponent);
5643 }
5644
5645 // Compare instruction for the OpQuantizeF16 compute exact case.
5646 // Returns true if the output is what is expected from the test case.
5647 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5648 {
5649         if (outputAllocs.size() != 1)
5650                 return false;
5651
5652         // Only size is needed because we cannot compare Nans.
5653         size_t byteSize = expectedOutputs[0].getByteSize();
5654
5655         const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5656
5657         if (byteSize != 4*sizeof(float)) {
5658                 return false;
5659         }
5660
5661         if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5662                 *outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5663                 return false;
5664         }
5665         outputAsFloat++;
5666
5667         if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5668                 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5669                 return false;
5670         }
5671         outputAsFloat++;
5672
5673         if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5674                 *outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5675                 return false;
5676         }
5677         outputAsFloat++;
5678
5679         if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5680                 *outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5681                 return false;
5682         }
5683
5684         return true;
5685 }
5686
5687 // Checks that every output from a test-case is a float NaN.
5688 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5689 {
5690         if (outputAllocs.size() != 1)
5691                 return false;
5692
5693         // Only size is needed because we cannot compare Nans.
5694         size_t byteSize = expectedOutputs[0].getByteSize();
5695
5696         const float* const      output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5697
5698         for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5699         {
5700                 if (!deFloatIsNaN(output_as_float[idx]))
5701                 {
5702                         return false;
5703                 }
5704         }
5705
5706         return true;
5707 }
5708
5709 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5710 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5711 {
5712         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5713
5714         const std::string shader (
5715                 string(getComputeAsmShaderPreamble()) +
5716
5717                 "OpSource GLSL 430\n"
5718                 "OpName %main           \"main\"\n"
5719                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5720
5721                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5722
5723                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5724
5725                 "%id        = OpVariable %uvec3ptr Input\n"
5726                 "%zero      = OpConstant %i32 0\n"
5727
5728                 "%main      = OpFunction %void None %voidf\n"
5729                 "%label     = OpLabel\n"
5730                 "%idval     = OpLoad %uvec3 %id\n"
5731                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5732                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5733                 "%inval     = OpLoad %f32 %inloc\n"
5734                 "%quant     = OpQuantizeToF16 %f32 %inval\n"
5735                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5736                 "             OpStore %outloc %quant\n"
5737                 "             OpReturn\n"
5738                 "             OpFunctionEnd\n");
5739
5740         {
5741                 ComputeShaderSpec       spec;
5742                 const deUint32          numElements             = 100;
5743                 vector<float>           infinities;
5744                 vector<float>           results;
5745
5746                 infinities.reserve(numElements);
5747                 results.reserve(numElements);
5748
5749                 for (size_t idx = 0; idx < numElements; ++idx)
5750                 {
5751                         switch(idx % 4)
5752                         {
5753                                 case 0:
5754                                         infinities.push_back(std::numeric_limits<float>::infinity());
5755                                         results.push_back(std::numeric_limits<float>::infinity());
5756                                         break;
5757                                 case 1:
5758                                         infinities.push_back(-std::numeric_limits<float>::infinity());
5759                                         results.push_back(-std::numeric_limits<float>::infinity());
5760                                         break;
5761                                 case 2:
5762                                         infinities.push_back(std::ldexp(1.0f, 16));
5763                                         results.push_back(std::numeric_limits<float>::infinity());
5764                                         break;
5765                                 case 3:
5766                                         infinities.push_back(std::ldexp(-1.0f, 32));
5767                                         results.push_back(-std::numeric_limits<float>::infinity());
5768                                         break;
5769                         }
5770                 }
5771
5772                 spec.assembly = shader;
5773                 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5774                 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5775                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5776
5777                 group->addChild(new SpvAsmComputeShaderCase(
5778                         testCtx, "infinities", "Check that infinities propagated and created", spec));
5779         }
5780
5781         {
5782                 ComputeShaderSpec       spec;
5783                 vector<float>           nans;
5784                 const deUint32          numElements             = 100;
5785
5786                 nans.reserve(numElements);
5787
5788                 for (size_t idx = 0; idx < numElements; ++idx)
5789                 {
5790                         if (idx % 2 == 0)
5791                         {
5792                                 nans.push_back(std::numeric_limits<float>::quiet_NaN());
5793                         }
5794                         else
5795                         {
5796                                 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5797                         }
5798                 }
5799
5800                 spec.assembly = shader;
5801                 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5802                 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5803                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5804                 spec.verifyIO = &compareNan;
5805
5806                 group->addChild(new SpvAsmComputeShaderCase(
5807                         testCtx, "propagated_nans", "Check that nans are propagated", spec));
5808         }
5809
5810         {
5811                 ComputeShaderSpec       spec;
5812                 vector<float>           small;
5813                 vector<float>           zeros;
5814                 const deUint32          numElements             = 100;
5815
5816                 small.reserve(numElements);
5817                 zeros.reserve(numElements);
5818
5819                 for (size_t idx = 0; idx < numElements; ++idx)
5820                 {
5821                         switch(idx % 6)
5822                         {
5823                                 case 0:
5824                                         small.push_back(0.f);
5825                                         zeros.push_back(0.f);
5826                                         break;
5827                                 case 1:
5828                                         small.push_back(-0.f);
5829                                         zeros.push_back(-0.f);
5830                                         break;
5831                                 case 2:
5832                                         small.push_back(std::ldexp(1.0f, -16));
5833                                         zeros.push_back(0.f);
5834                                         break;
5835                                 case 3:
5836                                         small.push_back(std::ldexp(-1.0f, -32));
5837                                         zeros.push_back(-0.f);
5838                                         break;
5839                                 case 4:
5840                                         small.push_back(std::ldexp(1.0f, -127));
5841                                         zeros.push_back(0.f);
5842                                         break;
5843                                 case 5:
5844                                         small.push_back(-std::ldexp(1.0f, -128));
5845                                         zeros.push_back(-0.f);
5846                                         break;
5847                         }
5848                 }
5849
5850                 spec.assembly = shader;
5851                 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5852                 spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5853                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5854
5855                 group->addChild(new SpvAsmComputeShaderCase(
5856                         testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5857         }
5858
5859         {
5860                 ComputeShaderSpec       spec;
5861                 vector<float>           exact;
5862                 const deUint32          numElements             = 200;
5863
5864                 exact.reserve(numElements);
5865
5866                 for (size_t idx = 0; idx < numElements; ++idx)
5867                         exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5868
5869                 spec.assembly = shader;
5870                 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5871                 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5872                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5873
5874                 group->addChild(new SpvAsmComputeShaderCase(
5875                         testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5876         }
5877
5878         {
5879                 ComputeShaderSpec       spec;
5880                 vector<float>           inputs;
5881                 const deUint32          numElements             = 4;
5882
5883                 inputs.push_back(constructNormalizedFloat(8,    0x300300));
5884                 inputs.push_back(-constructNormalizedFloat(-7,  0x600800));
5885                 inputs.push_back(constructNormalizedFloat(2,    0x01E000));
5886                 inputs.push_back(constructNormalizedFloat(1,    0xFFE000));
5887
5888                 spec.assembly = shader;
5889                 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5890                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5891                 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5892                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5893
5894                 group->addChild(new SpvAsmComputeShaderCase(
5895                         testCtx, "rounded", "Check that are rounded when needed", spec));
5896         }
5897
5898         return group.release();
5899 }
5900
5901 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5902 {
5903         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5904
5905         const std::string shader (
5906                 string(getComputeAsmShaderPreamble()) +
5907
5908                 "OpName %main           \"main\"\n"
5909                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5910
5911                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5912
5913                 "OpDecorate %sc_0  SpecId 0\n"
5914                 "OpDecorate %sc_1  SpecId 1\n"
5915                 "OpDecorate %sc_2  SpecId 2\n"
5916                 "OpDecorate %sc_3  SpecId 3\n"
5917                 "OpDecorate %sc_4  SpecId 4\n"
5918                 "OpDecorate %sc_5  SpecId 5\n"
5919
5920                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5921
5922                 "%id        = OpVariable %uvec3ptr Input\n"
5923                 "%zero      = OpConstant %i32 0\n"
5924                 "%c_u32_6   = OpConstant %u32 6\n"
5925
5926                 "%sc_0      = OpSpecConstant %f32 0.\n"
5927                 "%sc_1      = OpSpecConstant %f32 0.\n"
5928                 "%sc_2      = OpSpecConstant %f32 0.\n"
5929                 "%sc_3      = OpSpecConstant %f32 0.\n"
5930                 "%sc_4      = OpSpecConstant %f32 0.\n"
5931                 "%sc_5      = OpSpecConstant %f32 0.\n"
5932
5933                 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5934                 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5935                 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5936                 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5937                 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5938                 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5939
5940                 "%main      = OpFunction %void None %voidf\n"
5941                 "%label     = OpLabel\n"
5942                 "%idval     = OpLoad %uvec3 %id\n"
5943                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5944                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5945                 "%selector  = OpUMod %u32 %x %c_u32_6\n"
5946                 "            OpSelectionMerge %exit None\n"
5947                 "            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5948
5949                 "%case0     = OpLabel\n"
5950                 "             OpStore %outloc %sc_0_quant\n"
5951                 "             OpBranch %exit\n"
5952
5953                 "%case1     = OpLabel\n"
5954                 "             OpStore %outloc %sc_1_quant\n"
5955                 "             OpBranch %exit\n"
5956
5957                 "%case2     = OpLabel\n"
5958                 "             OpStore %outloc %sc_2_quant\n"
5959                 "             OpBranch %exit\n"
5960
5961                 "%case3     = OpLabel\n"
5962                 "             OpStore %outloc %sc_3_quant\n"
5963                 "             OpBranch %exit\n"
5964
5965                 "%case4     = OpLabel\n"
5966                 "             OpStore %outloc %sc_4_quant\n"
5967                 "             OpBranch %exit\n"
5968
5969                 "%case5     = OpLabel\n"
5970                 "             OpStore %outloc %sc_5_quant\n"
5971                 "             OpBranch %exit\n"
5972
5973                 "%exit      = OpLabel\n"
5974                 "             OpReturn\n"
5975
5976                 "             OpFunctionEnd\n");
5977
5978         {
5979                 ComputeShaderSpec       spec;
5980                 const deUint8           numCases        = 4;
5981                 vector<float>           inputs          (numCases, 0.f);
5982                 vector<float>           outputs;
5983
5984                 spec.assembly           = shader;
5985                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5986
5987                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5988                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5989                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5990                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5991
5992                 outputs.push_back(std::numeric_limits<float>::infinity());
5993                 outputs.push_back(-std::numeric_limits<float>::infinity());
5994                 outputs.push_back(std::numeric_limits<float>::infinity());
5995                 outputs.push_back(-std::numeric_limits<float>::infinity());
5996
5997                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5998                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5999
6000                 group->addChild(new SpvAsmComputeShaderCase(
6001                         testCtx, "infinities", "Check that infinities propagated and created", spec));
6002         }
6003
6004         {
6005                 ComputeShaderSpec       spec;
6006                 const deUint8           numCases        = 2;
6007                 vector<float>           inputs          (numCases, 0.f);
6008                 vector<float>           outputs;
6009
6010                 spec.assembly           = shader;
6011                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
6012                 spec.verifyIO           = &compareNan;
6013
6014                 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6015                 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6016
6017                 for (deUint8 idx = 0; idx < numCases; ++idx)
6018                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6019
6020                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6021                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6022
6023                 group->addChild(new SpvAsmComputeShaderCase(
6024                         testCtx, "propagated_nans", "Check that nans are propagated", spec));
6025         }
6026
6027         {
6028                 ComputeShaderSpec       spec;
6029                 const deUint8           numCases        = 6;
6030                 vector<float>           inputs          (numCases, 0.f);
6031                 vector<float>           outputs;
6032
6033                 spec.assembly           = shader;
6034                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
6035
6036                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
6037                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
6038                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
6039                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
6040                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
6041                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
6042
6043                 outputs.push_back(0.f);
6044                 outputs.push_back(-0.f);
6045                 outputs.push_back(0.f);
6046                 outputs.push_back(-0.f);
6047                 outputs.push_back(0.f);
6048                 outputs.push_back(-0.f);
6049
6050                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6051                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6052
6053                 group->addChild(new SpvAsmComputeShaderCase(
6054                         testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
6055         }
6056
6057         {
6058                 ComputeShaderSpec       spec;
6059                 const deUint8           numCases        = 6;
6060                 vector<float>           inputs          (numCases, 0.f);
6061                 vector<float>           outputs;
6062
6063                 spec.assembly           = shader;
6064                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
6065
6066                 for (deUint8 idx = 0; idx < 6; ++idx)
6067                 {
6068                         const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6069                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
6070                         outputs.push_back(f);
6071                 }
6072
6073                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6074                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6075
6076                 group->addChild(new SpvAsmComputeShaderCase(
6077                         testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
6078         }
6079
6080         {
6081                 ComputeShaderSpec       spec;
6082                 const deUint8           numCases        = 4;
6083                 vector<float>           inputs          (numCases, 0.f);
6084                 vector<float>           outputs;
6085
6086                 spec.assembly           = shader;
6087                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
6088                 spec.verifyIO           = &compareOpQuantizeF16ComputeExactCase;
6089
6090                 outputs.push_back(constructNormalizedFloat(8, 0x300300));
6091                 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6092                 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6093                 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6094
6095                 for (deUint8 idx = 0; idx < numCases; ++idx)
6096                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6097
6098                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6099                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6100
6101                 group->addChild(new SpvAsmComputeShaderCase(
6102                         testCtx, "rounded", "Check that are rounded when needed", spec));
6103         }
6104
6105         return group.release();
6106 }
6107
6108 // Checks that constant null/composite values can be used in computation.
6109 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
6110 {
6111         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
6112         ComputeShaderSpec                               spec;
6113         de::Random                                              rnd                             (deStringHash(group->getName()));
6114         const int                                               numElements             = 100;
6115         vector<float>                                   positiveFloats  (numElements, 0);
6116         vector<float>                                   negativeFloats  (numElements, 0);
6117
6118         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6119
6120         for (size_t ndx = 0; ndx < numElements; ++ndx)
6121                 negativeFloats[ndx] = -positiveFloats[ndx];
6122
6123         spec.assembly =
6124                 "OpCapability Shader\n"
6125                 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6126                 "OpMemoryModel Logical GLSL450\n"
6127                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6128                 "OpExecutionMode %main LocalSize 1 1 1\n"
6129
6130                 "OpSource GLSL 430\n"
6131                 "OpName %main           \"main\"\n"
6132                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6133
6134                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6135
6136                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6137
6138                 "%fmat      = OpTypeMatrix %fvec3 3\n"
6139                 "%ten       = OpConstant %u32 10\n"
6140                 "%f32arr10  = OpTypeArray %f32 %ten\n"
6141                 "%fst       = OpTypeStruct %f32 %f32\n"
6142
6143                 + string(getComputeAsmInputOutputBuffer()) +
6144
6145                 "%id        = OpVariable %uvec3ptr Input\n"
6146                 "%zero      = OpConstant %i32 0\n"
6147
6148                 // Create a bunch of null values
6149                 "%unull     = OpConstantNull %u32\n"
6150                 "%fnull     = OpConstantNull %f32\n"
6151                 "%vnull     = OpConstantNull %fvec3\n"
6152                 "%mnull     = OpConstantNull %fmat\n"
6153                 "%anull     = OpConstantNull %f32arr10\n"
6154                 "%snull     = OpConstantComposite %fst %fnull %fnull\n"
6155
6156                 "%main      = OpFunction %void None %voidf\n"
6157                 "%label     = OpLabel\n"
6158                 "%idval     = OpLoad %uvec3 %id\n"
6159                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6160                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6161                 "%inval     = OpLoad %f32 %inloc\n"
6162                 "%neg       = OpFNegate %f32 %inval\n"
6163
6164                 // Get the abs() of (a certain element of) those null values
6165                 "%unull_cov = OpConvertUToF %f32 %unull\n"
6166                 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6167                 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6168                 "%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
6169                 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6170                 "%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
6171                 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6172                 "%anull_3   = OpCompositeExtract %f32 %anull 3\n"
6173                 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6174                 "%snull_1   = OpCompositeExtract %f32 %snull 1\n"
6175                 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6176
6177                 // Add them all
6178                 "%add1      = OpFAdd %f32 %neg  %unull_abs\n"
6179                 "%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
6180                 "%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
6181                 "%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
6182                 "%add5      = OpFAdd %f32 %add4 %anull_abs\n"
6183                 "%final     = OpFAdd %f32 %add5 %snull_abs\n"
6184
6185                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6186                 "             OpStore %outloc %final\n" // write to output
6187                 "             OpReturn\n"
6188                 "             OpFunctionEnd\n";
6189         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6190         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6191         spec.numWorkGroups = IVec3(numElements, 1, 1);
6192
6193         group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
6194
6195         return group.release();
6196 }
6197
6198 // Assembly code used for testing loop control is based on GLSL source code:
6199 // #version 430
6200 //
6201 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6202 //   float elements[];
6203 // } input_data;
6204 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6205 //   float elements[];
6206 // } output_data;
6207 //
6208 // void main() {
6209 //   uint x = gl_GlobalInvocationID.x;
6210 //   output_data.elements[x] = input_data.elements[x];
6211 //   for (uint i = 0; i < 4; ++i)
6212 //     output_data.elements[x] += 1.f;
6213 // }
6214 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
6215 {
6216         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
6217         vector<CaseParameter>                   cases;
6218         de::Random                                              rnd                             (deStringHash(group->getName()));
6219         const int                                               numElements             = 100;
6220         vector<float>                                   inputFloats             (numElements, 0);
6221         vector<float>                                   outputFloats    (numElements, 0);
6222         const StringTemplate                    shaderTemplate  (
6223                 string(getComputeAsmShaderPreamble()) +
6224
6225                 "OpSource GLSL 430\n"
6226                 "OpName %main \"main\"\n"
6227                 "OpName %id \"gl_GlobalInvocationID\"\n"
6228
6229                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6230
6231                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6232
6233                 "%u32ptr      = OpTypePointer Function %u32\n"
6234
6235                 "%id          = OpVariable %uvec3ptr Input\n"
6236                 "%zero        = OpConstant %i32 0\n"
6237                 "%uzero       = OpConstant %u32 0\n"
6238                 "%one         = OpConstant %i32 1\n"
6239                 "%constf1     = OpConstant %f32 1.0\n"
6240                 "%four        = OpConstant %u32 4\n"
6241
6242                 "%main        = OpFunction %void None %voidf\n"
6243                 "%entry       = OpLabel\n"
6244                 "%i           = OpVariable %u32ptr Function\n"
6245                 "               OpStore %i %uzero\n"
6246
6247                 "%idval       = OpLoad %uvec3 %id\n"
6248                 "%x           = OpCompositeExtract %u32 %idval 0\n"
6249                 "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
6250                 "%inval       = OpLoad %f32 %inloc\n"
6251                 "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
6252                 "               OpStore %outloc %inval\n"
6253                 "               OpBranch %loop_entry\n"
6254
6255                 "%loop_entry  = OpLabel\n"
6256                 "%i_val       = OpLoad %u32 %i\n"
6257                 "%cmp_lt      = OpULessThan %bool %i_val %four\n"
6258                 "               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6259                 "               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6260                 "%loop_body   = OpLabel\n"
6261                 "%outval      = OpLoad %f32 %outloc\n"
6262                 "%addf1       = OpFAdd %f32 %outval %constf1\n"
6263                 "               OpStore %outloc %addf1\n"
6264                 "%new_i       = OpIAdd %u32 %i_val %one\n"
6265                 "               OpStore %i %new_i\n"
6266                 "               OpBranch %loop_entry\n"
6267                 "%loop_merge  = OpLabel\n"
6268                 "               OpReturn\n"
6269                 "               OpFunctionEnd\n");
6270
6271         cases.push_back(CaseParameter("none",                           "None"));
6272         cases.push_back(CaseParameter("unroll",                         "Unroll"));
6273         cases.push_back(CaseParameter("dont_unroll",            "DontUnroll"));
6274
6275         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6276
6277         for (size_t ndx = 0; ndx < numElements; ++ndx)
6278                 outputFloats[ndx] = inputFloats[ndx] + 4.f;
6279
6280         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6281         {
6282                 map<string, string>             specializations;
6283                 ComputeShaderSpec               spec;
6284
6285                 specializations["CONTROL"] = cases[caseNdx].param;
6286                 spec.assembly = shaderTemplate.specialize(specializations);
6287                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6288                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6289                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6290
6291                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6292         }
6293
6294         group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
6295         group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
6296
6297         return group.release();
6298 }
6299
6300 // Assembly code used for testing selection control is based on GLSL source code:
6301 // #version 430
6302 //
6303 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6304 //   float elements[];
6305 // } input_data;
6306 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6307 //   float elements[];
6308 // } output_data;
6309 //
6310 // void main() {
6311 //   uint x = gl_GlobalInvocationID.x;
6312 //   float val = input_data.elements[x];
6313 //   if (val > 10.f)
6314 //     output_data.elements[x] = val + 1.f;
6315 //   else
6316 //     output_data.elements[x] = val - 1.f;
6317 // }
6318 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
6319 {
6320         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
6321         vector<CaseParameter>                   cases;
6322         de::Random                                              rnd                             (deStringHash(group->getName()));
6323         const int                                               numElements             = 100;
6324         vector<float>                                   inputFloats             (numElements, 0);
6325         vector<float>                                   outputFloats    (numElements, 0);
6326         const StringTemplate                    shaderTemplate  (
6327                 string(getComputeAsmShaderPreamble()) +
6328
6329                 "OpSource GLSL 430\n"
6330                 "OpName %main \"main\"\n"
6331                 "OpName %id \"gl_GlobalInvocationID\"\n"
6332
6333                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6334
6335                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6336
6337                 "%id       = OpVariable %uvec3ptr Input\n"
6338                 "%zero     = OpConstant %i32 0\n"
6339                 "%constf1  = OpConstant %f32 1.0\n"
6340                 "%constf10 = OpConstant %f32 10.0\n"
6341
6342                 "%main     = OpFunction %void None %voidf\n"
6343                 "%entry    = OpLabel\n"
6344                 "%idval    = OpLoad %uvec3 %id\n"
6345                 "%x        = OpCompositeExtract %u32 %idval 0\n"
6346                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
6347                 "%inval    = OpLoad %f32 %inloc\n"
6348                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
6349                 "%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
6350
6351                 "            OpSelectionMerge %if_end ${CONTROL}\n"
6352                 "            OpBranchConditional %cmp_gt %if_true %if_false\n"
6353                 "%if_true  = OpLabel\n"
6354                 "%addf1    = OpFAdd %f32 %inval %constf1\n"
6355                 "            OpStore %outloc %addf1\n"
6356                 "            OpBranch %if_end\n"
6357                 "%if_false = OpLabel\n"
6358                 "%subf1    = OpFSub %f32 %inval %constf1\n"
6359                 "            OpStore %outloc %subf1\n"
6360                 "            OpBranch %if_end\n"
6361                 "%if_end   = OpLabel\n"
6362                 "            OpReturn\n"
6363                 "            OpFunctionEnd\n");
6364
6365         cases.push_back(CaseParameter("none",                                   "None"));
6366         cases.push_back(CaseParameter("flatten",                                "Flatten"));
6367         cases.push_back(CaseParameter("dont_flatten",                   "DontFlatten"));
6368         cases.push_back(CaseParameter("flatten_dont_flatten",   "DontFlatten|Flatten"));
6369
6370         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6371
6372         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6373         floorAll(inputFloats);
6374
6375         for (size_t ndx = 0; ndx < numElements; ++ndx)
6376                 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6377
6378         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6379         {
6380                 map<string, string>             specializations;
6381                 ComputeShaderSpec               spec;
6382
6383                 specializations["CONTROL"] = cases[caseNdx].param;
6384                 spec.assembly = shaderTemplate.specialize(specializations);
6385                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6386                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6387                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6388
6389                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6390         }
6391
6392         return group.release();
6393 }
6394
6395 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6396 {
6397         // Generate a long name.
6398         std::string longname;
6399         longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6400
6401         // Some bad names, abusing utf-8 encoding. This may also cause problems
6402         // with the logs.
6403         // 1. Various illegal code points in utf-8
6404         std::string utf8illegal =
6405                 "Illegal bytes in UTF-8: "
6406                 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6407                 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6408
6409         // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6410         std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6411
6412         // 3. Some overlong encodings
6413         std::string utf8overlong =
6414                 "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6415                 "\xf0\x8f\xbf\xbf";
6416
6417         // 4. Internet "zalgo" meme "bleeding text"
6418         std::string utf8zalgo =
6419                 "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6420                 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6421                 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6422                 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6423                 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6424                 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6425                 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6426                 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6427                 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6428                 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6429                 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6430                 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6431                 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6432                 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6433                 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6434                 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6435                 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6436                 "\x93\xcd\x96\xcc\x97\xff";
6437
6438         // General name abuses
6439         abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6440         abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6441         abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6442         abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6443         abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6444
6445         // GL keywords
6446         abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6447         abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6448         abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6449         abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6450         abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6451         abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6452         abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6453         abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6454         abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6455         abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6456         abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6457         abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6458         abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6459         abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6460         abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6461         abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6462         abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6463         abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6464         abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6465         abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6466         abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6467 }
6468
6469 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6470 {
6471         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6472         de::MovePtr<tcu::TestCaseGroup> entryMainGroup  (new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6473         de::MovePtr<tcu::TestCaseGroup> entryNotGroup   (new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6474         de::MovePtr<tcu::TestCaseGroup> abuseGroup              (new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6475         vector<CaseParameter>                   cases;
6476         vector<CaseParameter>                   abuseCases;
6477         vector<string>                                  testFunc;
6478         de::Random                                              rnd                             (deStringHash(group->getName()));
6479         const int                                               numElements             = 128;
6480         vector<float>                                   inputFloats             (numElements, 0);
6481         vector<float>                                   outputFloats    (numElements, 0);
6482
6483         getOpNameAbuseCases(abuseCases);
6484
6485         fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6486
6487         for(size_t ndx = 0; ndx < numElements; ++ndx)
6488                 outputFloats[ndx] = -inputFloats[ndx];
6489
6490         const string commonShaderHeader =
6491                 "OpCapability Shader\n"
6492                 "OpMemoryModel Logical GLSL450\n"
6493                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6494                 "OpExecutionMode %main LocalSize 1 1 1\n";
6495
6496         const string commonShaderFooter =
6497                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6498
6499                 + string(getComputeAsmInputOutputBufferTraits())
6500                 + string(getComputeAsmCommonTypes())
6501                 + string(getComputeAsmInputOutputBuffer()) +
6502
6503                 "%id        = OpVariable %uvec3ptr Input\n"
6504                 "%zero      = OpConstant %i32 0\n"
6505
6506                 "%func      = OpFunction %void None %voidf\n"
6507                 "%5         = OpLabel\n"
6508                 "             OpReturn\n"
6509                 "             OpFunctionEnd\n"
6510
6511                 "%main      = OpFunction %void None %voidf\n"
6512                 "%entry     = OpLabel\n"
6513                 "%7         = OpFunctionCall %void %func\n"
6514
6515                 "%idval     = OpLoad %uvec3 %id\n"
6516                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6517
6518                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6519                 "%inval     = OpLoad %f32 %inloc\n"
6520                 "%neg       = OpFNegate %f32 %inval\n"
6521                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6522                 "             OpStore %outloc %neg\n"
6523
6524                 "             OpReturn\n"
6525                 "             OpFunctionEnd\n";
6526
6527         const StringTemplate shaderTemplate (
6528                 "OpCapability Shader\n"
6529                 "OpMemoryModel Logical GLSL450\n"
6530                 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6531                 "OpExecutionMode %main LocalSize 1 1 1\n"
6532                 "OpName %${ID} \"${NAME}\"\n" +
6533                 commonShaderFooter);
6534
6535         const std::string multipleNames =
6536                 commonShaderHeader +
6537                 "OpName %main \"to_be\"\n"
6538                 "OpName %id   \"or_not\"\n"
6539                 "OpName %main \"to_be\"\n"
6540                 "OpName %main \"makes_no\"\n"
6541                 "OpName %func \"difference\"\n"
6542                 "OpName %5    \"to_me\"\n" +
6543                 commonShaderFooter;
6544
6545         {
6546                 ComputeShaderSpec       spec;
6547
6548                 spec.assembly           = multipleNames;
6549                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6550                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6551                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6552
6553                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6554         }
6555
6556         const std::string everythingNamed =
6557                 commonShaderHeader +
6558                 "OpName %main   \"name1\"\n"
6559                 "OpName %id     \"name2\"\n"
6560                 "OpName %zero   \"name3\"\n"
6561                 "OpName %entry  \"name4\"\n"
6562                 "OpName %func   \"name5\"\n"
6563                 "OpName %5      \"name6\"\n"
6564                 "OpName %7      \"name7\"\n"
6565                 "OpName %idval  \"name8\"\n"
6566                 "OpName %inloc  \"name9\"\n"
6567                 "OpName %inval  \"name10\"\n"
6568                 "OpName %neg    \"name11\"\n"
6569                 "OpName %outloc \"name12\"\n"+
6570                 commonShaderFooter;
6571         {
6572                 ComputeShaderSpec       spec;
6573
6574                 spec.assembly           = everythingNamed;
6575                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6576                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6577                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6578
6579                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6580         }
6581
6582         const std::string everythingNamedTheSame =
6583                 commonShaderHeader +
6584                 "OpName %main   \"the_same\"\n"
6585                 "OpName %id     \"the_same\"\n"
6586                 "OpName %zero   \"the_same\"\n"
6587                 "OpName %entry  \"the_same\"\n"
6588                 "OpName %func   \"the_same\"\n"
6589                 "OpName %5      \"the_same\"\n"
6590                 "OpName %7      \"the_same\"\n"
6591                 "OpName %idval  \"the_same\"\n"
6592                 "OpName %inloc  \"the_same\"\n"
6593                 "OpName %inval  \"the_same\"\n"
6594                 "OpName %neg    \"the_same\"\n"
6595                 "OpName %outloc \"the_same\"\n"+
6596                 commonShaderFooter;
6597         {
6598                 ComputeShaderSpec       spec;
6599
6600                 spec.assembly           = everythingNamedTheSame;
6601                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6602                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6603                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6604
6605                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6606         }
6607
6608         // main_is_...
6609         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6610         {
6611                 map<string, string>     specializations;
6612                 ComputeShaderSpec       spec;
6613
6614                 specializations["ENTRY"]        = "main";
6615                 specializations["ID"]           = "main";
6616                 specializations["NAME"]         = abuseCases[ndx].param;
6617                 spec.assembly                           = shaderTemplate.specialize(specializations);
6618                 spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6619                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6620                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6621
6622                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6623         }
6624
6625         // x_is_....
6626         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6627         {
6628                 map<string, string>     specializations;
6629                 ComputeShaderSpec       spec;
6630
6631                 specializations["ENTRY"]        = "main";
6632                 specializations["ID"]           = "x";
6633                 specializations["NAME"]         = abuseCases[ndx].param;
6634                 spec.assembly                           = shaderTemplate.specialize(specializations);
6635                 spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6636                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6637                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6638
6639                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6640         }
6641
6642         cases.push_back(CaseParameter("_is_main", "main"));
6643         cases.push_back(CaseParameter("_is_not_main", "not_main"));
6644         testFunc.push_back("main");
6645         testFunc.push_back("func");
6646
6647         for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6648         {
6649                 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6650                 {
6651                         map<string, string>     specializations;
6652                         ComputeShaderSpec       spec;
6653
6654                         specializations["ENTRY"]        = "main";
6655                         specializations["ID"]           = testFunc[fNdx];
6656                         specializations["NAME"]         = cases[ndx].param;
6657                         spec.assembly                           = shaderTemplate.specialize(specializations);
6658                         spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6659                         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6660                         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6661
6662                         entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6663                 }
6664         }
6665
6666         cases.push_back(CaseParameter("_is_entry", "rdc"));
6667
6668         for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6669         {
6670                 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6671                 {
6672                         map<string, string>     specializations;
6673                         ComputeShaderSpec       spec;
6674
6675                         specializations["ENTRY"]        = "rdc";
6676                         specializations["ID"]           = testFunc[fNdx];
6677                         specializations["NAME"]         = cases[ndx].param;
6678                         spec.assembly                           = shaderTemplate.specialize(specializations);
6679                         spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6680                         spec.entryPoint                         = "rdc";
6681                         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6682                         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6683
6684                         entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6685                 }
6686         }
6687
6688         group->addChild(entryMainGroup.release());
6689         group->addChild(entryNotGroup.release());
6690         group->addChild(abuseGroup.release());
6691
6692         return group.release();
6693 }
6694
6695 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6696 {
6697         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6698         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6699         vector<CaseParameter>                   abuseCases;
6700         vector<string>                                  testFunc;
6701         de::Random                                              rnd(deStringHash(group->getName()));
6702         const int                                               numElements = 128;
6703         vector<float>                                   inputFloats(numElements, 0);
6704         vector<float>                                   outputFloats(numElements, 0);
6705
6706         getOpNameAbuseCases(abuseCases);
6707
6708         fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6709
6710         for (size_t ndx = 0; ndx < numElements; ++ndx)
6711                 outputFloats[ndx] = -inputFloats[ndx];
6712
6713         const string commonShaderHeader =
6714                 "OpCapability Shader\n"
6715                 "OpMemoryModel Logical GLSL450\n"
6716                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6717                 "OpExecutionMode %main LocalSize 1 1 1\n";
6718
6719         const string commonShaderFooter =
6720                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6721
6722                 + string(getComputeAsmInputOutputBufferTraits())
6723                 + string(getComputeAsmCommonTypes())
6724                 + string(getComputeAsmInputOutputBuffer()) +
6725
6726                 "%u3str     = OpTypeStruct %u32 %u32 %u32\n"
6727
6728                 "%id        = OpVariable %uvec3ptr Input\n"
6729                 "%zero      = OpConstant %i32 0\n"
6730
6731                 "%main      = OpFunction %void None %voidf\n"
6732                 "%entry     = OpLabel\n"
6733
6734                 "%idval     = OpLoad %uvec3 %id\n"
6735                 "%x0        = OpCompositeExtract %u32 %idval 0\n"
6736
6737                 "%idstr     = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6738                 "%x         = OpCompositeExtract %u32 %idstr 0\n"
6739
6740                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6741                 "%inval     = OpLoad %f32 %inloc\n"
6742                 "%neg       = OpFNegate %f32 %inval\n"
6743                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6744                 "             OpStore %outloc %neg\n"
6745
6746                 "             OpReturn\n"
6747                 "             OpFunctionEnd\n";
6748
6749         const StringTemplate shaderTemplate(
6750                 commonShaderHeader +
6751                 "OpMemberName %u3str 0 \"${NAME}\"\n" +
6752                 commonShaderFooter);
6753
6754         const std::string multipleNames =
6755                 commonShaderHeader +
6756                 "OpMemberName %u3str 0 \"to_be\"\n"
6757                 "OpMemberName %u3str 1 \"or_not\"\n"
6758                 "OpMemberName %u3str 0 \"to_be\"\n"
6759                 "OpMemberName %u3str 2 \"makes_no\"\n"
6760                 "OpMemberName %u3str 0 \"difference\"\n"
6761                 "OpMemberName %u3str 0 \"to_me\"\n" +
6762                 commonShaderFooter;
6763         {
6764                 ComputeShaderSpec       spec;
6765
6766                 spec.assembly = multipleNames;
6767                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6768                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6769                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6770
6771                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6772         }
6773
6774         const std::string everythingNamedTheSame =
6775                 commonShaderHeader +
6776                 "OpMemberName %u3str 0 \"the_same\"\n"
6777                 "OpMemberName %u3str 1 \"the_same\"\n"
6778                 "OpMemberName %u3str 2 \"the_same\"\n" +
6779                 commonShaderFooter;
6780
6781         {
6782                 ComputeShaderSpec       spec;
6783
6784                 spec.assembly = everythingNamedTheSame;
6785                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6786                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6787                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6788
6789                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6790         }
6791
6792         // u3str_x_is_....
6793         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6794         {
6795                 map<string, string>     specializations;
6796                 ComputeShaderSpec       spec;
6797
6798                 specializations["NAME"] = abuseCases[ndx].param;
6799                 spec.assembly = shaderTemplate.specialize(specializations);
6800                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6801                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6802                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6803
6804                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6805         }
6806
6807         group->addChild(abuseGroup.release());
6808
6809         return group.release();
6810 }
6811
6812 // Assembly code used for testing function control is based on GLSL source code:
6813 //
6814 // #version 430
6815 //
6816 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6817 //   float elements[];
6818 // } input_data;
6819 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6820 //   float elements[];
6821 // } output_data;
6822 //
6823 // float const10() { return 10.f; }
6824 //
6825 // void main() {
6826 //   uint x = gl_GlobalInvocationID.x;
6827 //   output_data.elements[x] = input_data.elements[x] + const10();
6828 // }
6829 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6830 {
6831         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6832         vector<CaseParameter>                   cases;
6833         de::Random                                              rnd                             (deStringHash(group->getName()));
6834         const int                                               numElements             = 100;
6835         vector<float>                                   inputFloats             (numElements, 0);
6836         vector<float>                                   outputFloats    (numElements, 0);
6837         const StringTemplate                    shaderTemplate  (
6838                 string(getComputeAsmShaderPreamble()) +
6839
6840                 "OpSource GLSL 430\n"
6841                 "OpName %main \"main\"\n"
6842                 "OpName %func_const10 \"const10(\"\n"
6843                 "OpName %id \"gl_GlobalInvocationID\"\n"
6844
6845                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6846
6847                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6848
6849                 "%f32f = OpTypeFunction %f32\n"
6850                 "%id = OpVariable %uvec3ptr Input\n"
6851                 "%zero = OpConstant %i32 0\n"
6852                 "%constf10 = OpConstant %f32 10.0\n"
6853
6854                 "%main         = OpFunction %void None %voidf\n"
6855                 "%entry        = OpLabel\n"
6856                 "%idval        = OpLoad %uvec3 %id\n"
6857                 "%x            = OpCompositeExtract %u32 %idval 0\n"
6858                 "%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
6859                 "%inval        = OpLoad %f32 %inloc\n"
6860                 "%ret_10       = OpFunctionCall %f32 %func_const10\n"
6861                 "%fadd         = OpFAdd %f32 %inval %ret_10\n"
6862                 "%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
6863                 "                OpStore %outloc %fadd\n"
6864                 "                OpReturn\n"
6865                 "                OpFunctionEnd\n"
6866
6867                 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6868                 "%label        = OpLabel\n"
6869                 "                OpReturnValue %constf10\n"
6870                 "                OpFunctionEnd\n");
6871
6872         cases.push_back(CaseParameter("none",                                           "None"));
6873         cases.push_back(CaseParameter("inline",                                         "Inline"));
6874         cases.push_back(CaseParameter("dont_inline",                            "DontInline"));
6875         cases.push_back(CaseParameter("pure",                                           "Pure"));
6876         cases.push_back(CaseParameter("const",                                          "Const"));
6877         cases.push_back(CaseParameter("inline_pure",                            "Inline|Pure"));
6878         cases.push_back(CaseParameter("const_dont_inline",                      "Const|DontInline"));
6879         cases.push_back(CaseParameter("inline_dont_inline",                     "Inline|DontInline"));
6880         cases.push_back(CaseParameter("pure_inline_dont_inline",        "Pure|Inline|DontInline"));
6881
6882         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6883
6884         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6885         floorAll(inputFloats);
6886
6887         for (size_t ndx = 0; ndx < numElements; ++ndx)
6888                 outputFloats[ndx] = inputFloats[ndx] + 10.f;
6889
6890         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6891         {
6892                 map<string, string>             specializations;
6893                 ComputeShaderSpec               spec;
6894
6895                 specializations["CONTROL"] = cases[caseNdx].param;
6896                 spec.assembly = shaderTemplate.specialize(specializations);
6897                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6898                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6899                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6900
6901                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6902         }
6903
6904         return group.release();
6905 }
6906
6907 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6908 {
6909         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6910         vector<CaseParameter>                   cases;
6911         de::Random                                              rnd                             (deStringHash(group->getName()));
6912         const int                                               numElements             = 100;
6913         vector<float>                                   inputFloats             (numElements, 0);
6914         vector<float>                                   outputFloats    (numElements, 0);
6915         const StringTemplate                    shaderTemplate  (
6916                 string(getComputeAsmShaderPreamble()) +
6917
6918                 "OpSource GLSL 430\n"
6919                 "OpName %main           \"main\"\n"
6920                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6921
6922                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6923
6924                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6925
6926                 "%f32ptr_f  = OpTypePointer Function %f32\n"
6927
6928                 "%id        = OpVariable %uvec3ptr Input\n"
6929                 "%zero      = OpConstant %i32 0\n"
6930                 "%four      = OpConstant %i32 4\n"
6931
6932                 "%main      = OpFunction %void None %voidf\n"
6933                 "%label     = OpLabel\n"
6934                 "%copy      = OpVariable %f32ptr_f Function\n"
6935                 "%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
6936                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6937                 "%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
6938                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6939                 "             OpCopyMemory %copy %inloc ${ACCESS}\n"
6940                 "%val1      = OpLoad %f32 %copy\n"
6941                 "%val2      = OpLoad %f32 %inloc\n"
6942                 "%add       = OpFAdd %f32 %val1 %val2\n"
6943                 "             OpStore %outloc %add ${ACCESS}\n"
6944                 "             OpReturn\n"
6945                 "             OpFunctionEnd\n");
6946
6947         cases.push_back(CaseParameter("null",                                   ""));
6948         cases.push_back(CaseParameter("none",                                   "None"));
6949         cases.push_back(CaseParameter("volatile",                               "Volatile"));
6950         cases.push_back(CaseParameter("aligned",                                "Aligned 4"));
6951         cases.push_back(CaseParameter("nontemporal",                    "Nontemporal"));
6952         cases.push_back(CaseParameter("aligned_nontemporal",    "Aligned|Nontemporal 4"));
6953         cases.push_back(CaseParameter("aligned_volatile",               "Volatile|Aligned 4"));
6954
6955         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6956
6957         for (size_t ndx = 0; ndx < numElements; ++ndx)
6958                 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6959
6960         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6961         {
6962                 map<string, string>             specializations;
6963                 ComputeShaderSpec               spec;
6964
6965                 specializations["ACCESS"] = cases[caseNdx].param;
6966                 spec.assembly = shaderTemplate.specialize(specializations);
6967                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6968                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6969                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6970
6971                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6972         }
6973
6974         return group.release();
6975 }
6976
6977 // Checks that we can get undefined values for various types, without exercising a computation with it.
6978 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6979 {
6980         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6981         vector<CaseParameter>                   cases;
6982         de::Random                                              rnd                             (deStringHash(group->getName()));
6983         const int                                               numElements             = 100;
6984         vector<float>                                   positiveFloats  (numElements, 0);
6985         vector<float>                                   negativeFloats  (numElements, 0);
6986         const StringTemplate                    shaderTemplate  (
6987                 string(getComputeAsmShaderPreamble()) +
6988
6989                 "OpSource GLSL 430\n"
6990                 "OpName %main           \"main\"\n"
6991                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6992
6993                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6994
6995                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6996                 "%uvec2     = OpTypeVector %u32 2\n"
6997                 "%fvec4     = OpTypeVector %f32 4\n"
6998                 "%fmat33    = OpTypeMatrix %fvec3 3\n"
6999                 "%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
7000                 "%sampler   = OpTypeSampler\n"
7001                 "%simage    = OpTypeSampledImage %image\n"
7002                 "%const100  = OpConstant %u32 100\n"
7003                 "%uarr100   = OpTypeArray %i32 %const100\n"
7004                 "%struct    = OpTypeStruct %f32 %i32 %u32\n"
7005                 "%pointer   = OpTypePointer Function %i32\n"
7006                 + string(getComputeAsmInputOutputBuffer()) +
7007
7008                 "%id        = OpVariable %uvec3ptr Input\n"
7009                 "%zero      = OpConstant %i32 0\n"
7010
7011                 "%main      = OpFunction %void None %voidf\n"
7012                 "%label     = OpLabel\n"
7013
7014                 "%undef     = OpUndef ${TYPE}\n"
7015
7016                 "%idval     = OpLoad %uvec3 %id\n"
7017                 "%x         = OpCompositeExtract %u32 %idval 0\n"
7018
7019                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7020                 "%inval     = OpLoad %f32 %inloc\n"
7021                 "%neg       = OpFNegate %f32 %inval\n"
7022                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7023                 "             OpStore %outloc %neg\n"
7024                 "             OpReturn\n"
7025                 "             OpFunctionEnd\n");
7026
7027         cases.push_back(CaseParameter("bool",                   "%bool"));
7028         cases.push_back(CaseParameter("sint32",                 "%i32"));
7029         cases.push_back(CaseParameter("uint32",                 "%u32"));
7030         cases.push_back(CaseParameter("float32",                "%f32"));
7031         cases.push_back(CaseParameter("vec4float32",    "%fvec4"));
7032         cases.push_back(CaseParameter("vec2uint32",             "%uvec2"));
7033         cases.push_back(CaseParameter("matrix",                 "%fmat33"));
7034         cases.push_back(CaseParameter("image",                  "%image"));
7035         cases.push_back(CaseParameter("sampler",                "%sampler"));
7036         cases.push_back(CaseParameter("sampledimage",   "%simage"));
7037         cases.push_back(CaseParameter("array",                  "%uarr100"));
7038         cases.push_back(CaseParameter("runtimearray",   "%f32arr"));
7039         cases.push_back(CaseParameter("struct",                 "%struct"));
7040         cases.push_back(CaseParameter("pointer",                "%pointer"));
7041
7042         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7043
7044         for (size_t ndx = 0; ndx < numElements; ++ndx)
7045                 negativeFloats[ndx] = -positiveFloats[ndx];
7046
7047         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7048         {
7049                 map<string, string>             specializations;
7050                 ComputeShaderSpec               spec;
7051
7052                 specializations["TYPE"] = cases[caseNdx].param;
7053                 spec.assembly = shaderTemplate.specialize(specializations);
7054                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7055                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7056                 spec.numWorkGroups = IVec3(numElements, 1, 1);
7057
7058                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7059         }
7060
7061         // OpUndef with constants.
7062 #ifndef CTS_USES_VULKANSC
7063         {
7064                 static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7065
7066                 static const struct
7067                 {
7068                         const std::string name;
7069                         const std::string desc;
7070                 } amberCases[] =
7071                 {
7072                         { "undefined_constant_composite",               "OpUndef value in OpConstantComposite"          },
7073                         { "undefined_spec_constant_composite",  "OpUndef value in OpSpecConstantComposite"      },
7074                 };
7075
7076                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7077                 {
7078                         cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
7079                                                                                                                                                                 amberCases[i].name.c_str(),
7080                                                                                                                                                                 amberCases[i].desc.c_str(),
7081                                                                                                                                                                 data_dir,
7082                                                                                                                                                                 amberCases[i].name + ".amber");
7083                         group->addChild(testCase);
7084                 }
7085         }
7086 #endif
7087
7088         return group.release();
7089 }
7090
7091 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
7092 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
7093 {
7094         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
7095         vector<CaseParameter>                   cases;
7096         de::Random                                              rnd                             (deStringHash(group->getName()));
7097         const int                                               numElements             = 100;
7098         vector<float>                                   positiveFloats  (numElements, 0);
7099         vector<float>                                   negativeFloats  (numElements, 0);
7100         const StringTemplate                    shaderTemplate  (
7101                 "OpCapability Shader\n"
7102                 "OpCapability Float16\n"
7103                 "OpMemoryModel Logical GLSL450\n"
7104                 "OpEntryPoint GLCompute %main \"main\" %id\n"
7105                 "OpExecutionMode %main LocalSize 1 1 1\n"
7106                 "OpSource GLSL 430\n"
7107                 "OpName %main           \"main\"\n"
7108                 "OpName %id             \"gl_GlobalInvocationID\"\n"
7109
7110                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7111
7112                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7113
7114                 "%id        = OpVariable %uvec3ptr Input\n"
7115                 "%zero      = OpConstant %i32 0\n"
7116                 "%f16       = OpTypeFloat 16\n"
7117                 "%c_f16_0   = OpConstant %f16 0.0\n"
7118                 "%c_f16_0_5 = OpConstant %f16 0.5\n"
7119                 "%c_f16_1   = OpConstant %f16 1.0\n"
7120                 "%v2f16     = OpTypeVector %f16 2\n"
7121                 "%v3f16     = OpTypeVector %f16 3\n"
7122                 "%v4f16     = OpTypeVector %f16 4\n"
7123
7124                 "${CONSTANT}\n"
7125
7126                 "%main      = OpFunction %void None %voidf\n"
7127                 "%label     = OpLabel\n"
7128                 "%idval     = OpLoad %uvec3 %id\n"
7129                 "%x         = OpCompositeExtract %u32 %idval 0\n"
7130                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7131                 "%inval     = OpLoad %f32 %inloc\n"
7132                 "%neg       = OpFNegate %f32 %inval\n"
7133                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7134                 "             OpStore %outloc %neg\n"
7135                 "             OpReturn\n"
7136                 "             OpFunctionEnd\n");
7137
7138
7139         cases.push_back(CaseParameter("vector",                 "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7140         cases.push_back(CaseParameter("matrix",                 "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7141                                                                                                         "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7142                                                                                                         "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7143         cases.push_back(CaseParameter("struct",                 "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7144                                                                                                         "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7145                                                                                                         "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7146                                                                                                         "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7147                                                                                                         "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7148         cases.push_back(CaseParameter("nested_struct",  "%st1 = OpTypeStruct %i32 %f16\n"
7149                                                                                                         "%st2 = OpTypeStruct %i32 %i32\n"
7150                                                                                                         "%struct = OpTypeStruct %st1 %st2\n"
7151                                                                                                         "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7152                                                                                                         "%st2val = OpConstantComposite %st2 %zero %zero\n"
7153                                                                                                         "%const = OpConstantComposite %struct %st1val %st2val"));
7154
7155         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7156
7157         for (size_t ndx = 0; ndx < numElements; ++ndx)
7158                 negativeFloats[ndx] = -positiveFloats[ndx];
7159
7160         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7161         {
7162                 map<string, string>             specializations;
7163                 ComputeShaderSpec               spec;
7164
7165                 specializations["CONSTANT"] = cases[caseNdx].param;
7166                 spec.assembly = shaderTemplate.specialize(specializations);
7167                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7168                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7169                 spec.numWorkGroups = IVec3(numElements, 1, 1);
7170
7171                 spec.extensions.push_back("VK_KHR_shader_float16_int8");
7172
7173                 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7174
7175                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7176         }
7177
7178         return group.release();
7179 }
7180
7181 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
7182 {
7183         const size_t            inDataLength    = inData.size();
7184         vector<deFloat16>       result;
7185
7186         result.reserve(inDataLength * inDataLength);
7187
7188         if (argNo == 0)
7189         {
7190                 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7191                         result.insert(result.end(), inData.begin(), inData.end());
7192         }
7193
7194         if (argNo == 1)
7195         {
7196                 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7197                 {
7198                         const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
7199
7200                         result.insert(result.end(), tmp.begin(), tmp.end());
7201                 }
7202         }
7203
7204         return result;
7205 }
7206
7207 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
7208 {
7209         vector<deFloat16>       vec;
7210         vector<deFloat16>       result;
7211
7212         // Create vectors. vec will contain each possible pair from inData
7213         {
7214                 const size_t    inDataLength    = inData.size();
7215
7216                 DE_ASSERT(inDataLength <= 64);
7217
7218                 vec.reserve(2 * inDataLength * inDataLength);
7219
7220                 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7221                 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7222                 {
7223                         vec.push_back(inData[numIdxX]);
7224                         vec.push_back(inData[numIdxY]);
7225                 }
7226         }
7227
7228         // Create vector pairs. result will contain each possible pair from vec
7229         {
7230                 const size_t    coordsPerVector = 2;
7231                 const size_t    vectorsCount    = vec.size() / coordsPerVector;
7232
7233                 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7234
7235                 if (argNo == 0)
7236                 {
7237                         for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7238                         for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7239                         {
7240                                 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7241                                         result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7242                         }
7243                 }
7244
7245                 if (argNo == 1)
7246                 {
7247                         for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7248                         for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7249                         {
7250                                 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7251                                         result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7252                         }
7253                 }
7254         }
7255
7256         return result;
7257 }
7258
7259 struct fp16isNan                        { bool operator()(const tcu::Float16 in1, const tcu::Float16)           { return in1.isNaN(); } };
7260 struct fp16isInf                        { bool operator()(const tcu::Float16 in1, const tcu::Float16)           { return in1.isInf(); } };
7261 struct fp16isEqual                      { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() == in2.asFloat(); } };
7262 struct fp16isUnequal            { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() != in2.asFloat(); } };
7263 struct fp16isLess                       { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() <  in2.asFloat(); } };
7264 struct fp16isGreater            { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() >  in2.asFloat(); } };
7265 struct fp16isLessOrEqual        { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() <= in2.asFloat(); } };
7266 struct fp16isGreaterOrEqual     { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() >= in2.asFloat(); } };
7267
7268 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
7269 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
7270 {
7271         if (inputs.size() != 2 || outputAllocs.size() != 1)
7272                 return false;
7273
7274         vector<deUint8> input1Bytes;
7275         vector<deUint8> input2Bytes;
7276
7277         inputs[0].getBytes(input1Bytes);
7278         inputs[1].getBytes(input2Bytes);
7279
7280         const deUint32                  denormModesCount                        = 2;
7281         const deFloat16                 float16one                                      = tcu::Float16(1.0f).bits();
7282         const deFloat16                 float16zero                                     = tcu::Float16(0.0f).bits();
7283         const tcu::Float16              zero                                            = tcu::Float16::zero(1);
7284         const deFloat16* const  outputAsFP16                            = static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
7285         const deFloat16* const  input1AsFP16                            = reinterpret_cast<deFloat16* const>(&input1Bytes.front());
7286         const deFloat16* const  input2AsFP16                            = reinterpret_cast<deFloat16* const>(&input2Bytes.front());
7287         deUint32                                successfulRuns                          = denormModesCount;
7288         std::string                             results[denormModesCount];
7289         TestedLogicalFunction   testedLogicalFunction;
7290
7291         for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7292         {
7293                 const bool flushToZero = (denormMode == 1);
7294
7295                 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7296                 {
7297                         const tcu::Float16      f1pre                   = tcu::Float16(input1AsFP16[idx]);
7298                         const tcu::Float16      f2pre                   = tcu::Float16(input2AsFP16[idx]);
7299                         const tcu::Float16      f1                              = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7300                         const tcu::Float16      f2                              = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7301                         deFloat16                       expectedOutput  = float16zero;
7302
7303                         if (onlyTestFunc)
7304                         {
7305                                 if (testedLogicalFunction(f1, f2))
7306                                         expectedOutput = float16one;
7307                         }
7308                         else
7309                         {
7310                                 const bool      f1nan   = f1.isNaN();
7311                                 const bool      f2nan   = f2.isNaN();
7312
7313                                 // Skip NaN floats if not supported by implementation
7314                                 if (!nanSupported && (f1nan || f2nan))
7315                                         continue;
7316
7317                                 if (unationModeAnd)
7318                                 {
7319                                         const bool      ordered         = !f1nan && !f2nan;
7320
7321                                         if (ordered && testedLogicalFunction(f1, f2))
7322                                                 expectedOutput = float16one;
7323                                 }
7324                                 else
7325                                 {
7326                                         const bool      unordered       = f1nan || f2nan;
7327
7328                                         if (unordered || testedLogicalFunction(f1, f2))
7329                                                 expectedOutput = float16one;
7330                                 }
7331                         }
7332
7333                         if (outputAsFP16[idx] != expectedOutput)
7334                         {
7335                                 std::ostringstream str;
7336
7337                                 str << "ERROR: Sub-case #" << idx
7338                                         << " flushToZero:" << flushToZero
7339                                         << std::hex
7340                                         << " failed, inputs: 0x" << f1.bits()
7341                                         << ";0x" << f2.bits()
7342                                         << " output: 0x" << outputAsFP16[idx]
7343                                         << " expected output: 0x" << expectedOutput;
7344
7345                                 results[denormMode] = str.str();
7346
7347                                 successfulRuns--;
7348
7349                                 break;
7350                         }
7351                 }
7352         }
7353
7354         if (successfulRuns == 0)
7355                 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7356                         log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7357
7358         return successfulRuns > 0;
7359 }
7360
7361 } // anonymous
7362
7363 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
7364 {
7365         struct NameCodePair { string name, code; };
7366         RGBA                                                    defaultColors[4];
7367         de::MovePtr<tcu::TestCaseGroup> opSourceTests                   (new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
7368         const std::string                               opsourceGLSLWithFile    = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7369         map<string, string>                             fragments                               = passthruFragments();
7370         const NameCodePair                              tests[]                                 =
7371         {
7372                 {"unknown", "OpSource Unknown 321"},
7373                 {"essl", "OpSource ESSL 310"},
7374                 {"glsl", "OpSource GLSL 450"},
7375                 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
7376                 {"opencl_c", "OpSource OpenCL_C 120"},
7377                 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7378                 {"file", opsourceGLSLWithFile},
7379                 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
7380                 // Longest possible source string: SPIR-V limits instructions to 65535
7381                 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
7382                 // contain 65530 UTF8 characters (one word each) plus one last word
7383                 // containing 3 ASCII characters and \0.
7384                 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
7385         };
7386
7387         getDefaultColors(defaultColors);
7388         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7389         {
7390                 fragments["debug"] = tests[testNdx].code;
7391                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7392         }
7393
7394         return opSourceTests.release();
7395 }
7396
7397 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7398 {
7399         struct NameCodePair { string name, code; };
7400         RGBA                                                            defaultColors[4];
7401         de::MovePtr<tcu::TestCaseGroup>         opSourceTests           (new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7402         map<string, string>                                     fragments                       = passthruFragments();
7403         const std::string                                       opsource                        = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7404         const NameCodePair                                      tests[]                         =
7405         {
7406                 {"empty", opsource + "OpSourceContinued \"\""},
7407                 {"short", opsource + "OpSourceContinued \"abcde\""},
7408                 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7409                 // Longest possible source string: SPIR-V limits instructions to 65535
7410                 // words, of which the first one is OpSourceContinued/length; the rest
7411                 // will contain 65533 UTF8 characters (one word each) plus one last word
7412                 // containing 3 ASCII characters and \0.
7413                 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7414         };
7415
7416         getDefaultColors(defaultColors);
7417         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7418         {
7419                 fragments["debug"] = tests[testNdx].code;
7420                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7421         }
7422
7423         return opSourceTests.release();
7424 }
7425 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7426 {
7427         RGBA                                                             defaultColors[4];
7428         de::MovePtr<tcu::TestCaseGroup>          opLineTests             (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7429         map<string, string>                                      fragments;
7430         getDefaultColors(defaultColors);
7431         fragments["debug"]                      =
7432                 "%name = OpString \"name\"\n";
7433
7434         fragments["pre_main"]   =
7435                 "OpNoLine\n"
7436                 "OpNoLine\n"
7437                 "OpLine %name 1 1\n"
7438                 "OpNoLine\n"
7439                 "OpLine %name 1 1\n"
7440                 "OpLine %name 1 1\n"
7441                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7442                 "OpNoLine\n"
7443                 "OpLine %name 1 1\n"
7444                 "OpNoLine\n"
7445                 "OpLine %name 1 1\n"
7446                 "OpLine %name 1 1\n"
7447                 "%second_param1 = OpFunctionParameter %v4f32\n"
7448                 "OpNoLine\n"
7449                 "OpNoLine\n"
7450                 "%label_secondfunction = OpLabel\n"
7451                 "OpNoLine\n"
7452                 "OpReturnValue %second_param1\n"
7453                 "OpFunctionEnd\n"
7454                 "OpNoLine\n"
7455                 "OpNoLine\n";
7456
7457         fragments["testfun"]            =
7458                 // A %test_code function that returns its argument unchanged.
7459                 "OpNoLine\n"
7460                 "OpNoLine\n"
7461                 "OpLine %name 1 1\n"
7462                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7463                 "OpNoLine\n"
7464                 "%param1 = OpFunctionParameter %v4f32\n"
7465                 "OpNoLine\n"
7466                 "OpNoLine\n"
7467                 "%label_testfun = OpLabel\n"
7468                 "OpNoLine\n"
7469                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7470                 "OpReturnValue %val1\n"
7471                 "OpFunctionEnd\n"
7472                 "OpLine %name 1 1\n"
7473                 "OpNoLine\n";
7474
7475         createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7476
7477         return opLineTests.release();
7478 }
7479
7480 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7481 {
7482         RGBA                                                            defaultColors[4];
7483         de::MovePtr<tcu::TestCaseGroup>         opModuleProcessedTests                  (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7484         map<string, string>                                     fragments;
7485         std::vector<std::string>                        noExtensions;
7486         GraphicsResources                                       resources;
7487
7488         getDefaultColors(defaultColors);
7489         resources.verifyBinary = veryfiBinaryShader;
7490         resources.spirvVersion = SPIRV_VERSION_1_3;
7491
7492         fragments["moduleprocessed"]                                                    =
7493                 "OpModuleProcessed \"VULKAN CTS\"\n"
7494                 "OpModuleProcessed \"Negative values\"\n"
7495                 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7496
7497         fragments["pre_main"]   =
7498                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7499                 "%second_param1 = OpFunctionParameter %v4f32\n"
7500                 "%label_secondfunction = OpLabel\n"
7501                 "OpReturnValue %second_param1\n"
7502                 "OpFunctionEnd\n";
7503
7504         fragments["testfun"]            =
7505                 // A %test_code function that returns its argument unchanged.
7506                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7507                 "%param1 = OpFunctionParameter %v4f32\n"
7508                 "%label_testfun = OpLabel\n"
7509                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7510                 "OpReturnValue %val1\n"
7511                 "OpFunctionEnd\n";
7512
7513         createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7514
7515         return opModuleProcessedTests.release();
7516 }
7517
7518
7519 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7520 {
7521         RGBA                                                                                                    defaultColors[4];
7522         de::MovePtr<tcu::TestCaseGroup>                                                 opLineTests                     (new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7523         map<string, string>                                                                             fragments;
7524         std::vector<std::pair<std::string, std::string> >               problemStrings;
7525
7526         problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7527         problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7528         problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7529         getDefaultColors(defaultColors);
7530
7531         fragments["debug"]                      =
7532                 "%other_name = OpString \"other_name\"\n";
7533
7534         fragments["pre_main"]   =
7535                 "OpLine %file_name 32 0\n"
7536                 "OpLine %file_name 32 32\n"
7537                 "OpLine %file_name 32 40\n"
7538                 "OpLine %other_name 32 40\n"
7539                 "OpLine %other_name 0 100\n"
7540                 "OpLine %other_name 0 4294967295\n"
7541                 "OpLine %other_name 4294967295 0\n"
7542                 "OpLine %other_name 32 40\n"
7543                 "OpLine %file_name 0 0\n"
7544                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7545                 "OpLine %file_name 1 0\n"
7546                 "%second_param1 = OpFunctionParameter %v4f32\n"
7547                 "OpLine %file_name 1 3\n"
7548                 "OpLine %file_name 1 2\n"
7549                 "%label_secondfunction = OpLabel\n"
7550                 "OpLine %file_name 0 2\n"
7551                 "OpReturnValue %second_param1\n"
7552                 "OpFunctionEnd\n"
7553                 "OpLine %file_name 0 2\n"
7554                 "OpLine %file_name 0 2\n";
7555
7556         fragments["testfun"]            =
7557                 // A %test_code function that returns its argument unchanged.
7558                 "OpLine %file_name 1 0\n"
7559                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7560                 "OpLine %file_name 16 330\n"
7561                 "%param1 = OpFunctionParameter %v4f32\n"
7562                 "OpLine %file_name 14 442\n"
7563                 "%label_testfun = OpLabel\n"
7564                 "OpLine %file_name 11 1024\n"
7565                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7566                 "OpLine %file_name 2 97\n"
7567                 "OpReturnValue %val1\n"
7568                 "OpFunctionEnd\n"
7569                 "OpLine %file_name 5 32\n";
7570
7571         for (size_t i = 0; i < problemStrings.size(); ++i)
7572         {
7573                 map<string, string> testFragments = fragments;
7574                 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7575                 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7576         }
7577
7578         return opLineTests.release();
7579 }
7580
7581 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7582 {
7583         de::MovePtr<tcu::TestCaseGroup> opConstantNullTests             (new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7584         RGBA                                                    colors[4];
7585
7586
7587         const char                                              functionStart[] =
7588                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7589                 "%param1 = OpFunctionParameter %v4f32\n"
7590                 "%lbl    = OpLabel\n";
7591
7592         const char                                              functionEnd[]   =
7593                 "OpReturnValue %transformed_param\n"
7594                 "OpFunctionEnd\n";
7595
7596         struct NameConstantsCode
7597         {
7598                 string name;
7599                 string constants;
7600                 string code;
7601         };
7602
7603         NameConstantsCode tests[] =
7604         {
7605                 {
7606                         "vec4",
7607                         "%cnull = OpConstantNull %v4f32\n",
7608                         "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7609                 },
7610                 {
7611                         "float",
7612                         "%cnull = OpConstantNull %f32\n",
7613                         "%vp = OpVariable %fp_v4f32 Function\n"
7614                         "%v  = OpLoad %v4f32 %vp\n"
7615                         "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7616                         "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7617                         "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7618                         "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7619                         "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7620                 },
7621                 {
7622                         "bool",
7623                         "%cnull             = OpConstantNull %bool\n",
7624                         "%v                 = OpVariable %fp_v4f32 Function\n"
7625                         "                     OpStore %v %param1\n"
7626                         "                     OpSelectionMerge %false_label None\n"
7627                         "                     OpBranchConditional %cnull %true_label %false_label\n"
7628                         "%true_label        = OpLabel\n"
7629                         "                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7630                         "                     OpBranch %false_label\n"
7631                         "%false_label       = OpLabel\n"
7632                         "%transformed_param = OpLoad %v4f32 %v\n"
7633                 },
7634                 {
7635                         "i32",
7636                         "%cnull             = OpConstantNull %i32\n",
7637                         "%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7638                         "%b                 = OpIEqual %bool %cnull %c_i32_0\n"
7639                         "                     OpSelectionMerge %false_label None\n"
7640                         "                     OpBranchConditional %b %true_label %false_label\n"
7641                         "%true_label        = OpLabel\n"
7642                         "                     OpStore %v %param1\n"
7643                         "                     OpBranch %false_label\n"
7644                         "%false_label       = OpLabel\n"
7645                         "%transformed_param = OpLoad %v4f32 %v\n"
7646                 },
7647                 {
7648                         "struct",
7649                         "%stype             = OpTypeStruct %f32 %v4f32\n"
7650                         "%fp_stype          = OpTypePointer Function %stype\n"
7651                         "%cnull             = OpConstantNull %stype\n",
7652                         "%v                 = OpVariable %fp_stype Function %cnull\n"
7653                         "%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7654                         "%f_val             = OpLoad %v4f32 %f\n"
7655                         "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7656                 },
7657                 {
7658                         "array",
7659                         "%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
7660                         "%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
7661                         "%cnull             = OpConstantNull %a4_v4f32\n",
7662                         "%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
7663                         "%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7664                         "%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7665                         "%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7666                         "%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7667                         "%f_val             = OpLoad %v4f32 %f\n"
7668                         "%f1_val            = OpLoad %v4f32 %f1\n"
7669                         "%f2_val            = OpLoad %v4f32 %f2\n"
7670                         "%f3_val            = OpLoad %v4f32 %f3\n"
7671                         "%t0                = OpFAdd %v4f32 %param1 %f_val\n"
7672                         "%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
7673                         "%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
7674                         "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7675                 },
7676                 {
7677                         "matrix",
7678                         "%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
7679                         "%cnull             = OpConstantNull %mat4x4_f32\n",
7680                         // Our null matrix * any vector should result in a zero vector.
7681                         "%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7682                         "%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7683                 }
7684         };
7685
7686         getHalfColorsFullAlpha(colors);
7687
7688         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7689         {
7690                 map<string, string> fragments;
7691                 fragments["pre_main"] = tests[testNdx].constants;
7692                 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7693                 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7694         }
7695         return opConstantNullTests.release();
7696 }
7697 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7698 {
7699         de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests                (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7700         RGBA                                                    inputColors[4];
7701         RGBA                                                    outputColors[4];
7702
7703
7704         const char                                              functionStart[]  =
7705                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7706                 "%param1 = OpFunctionParameter %v4f32\n"
7707                 "%lbl    = OpLabel\n";
7708
7709         const char                                              functionEnd[]           =
7710                 "OpReturnValue %transformed_param\n"
7711                 "OpFunctionEnd\n";
7712
7713         struct NameConstantsCode
7714         {
7715                 string name;
7716                 string constants;
7717                 string code;
7718         };
7719
7720         NameConstantsCode tests[] =
7721         {
7722                 {
7723                         "vec4",
7724
7725                         "%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7726                         "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7727                 },
7728                 {
7729                         "struct",
7730
7731                         "%stype             = OpTypeStruct %v4f32 %f32\n"
7732                         "%fp_stype          = OpTypePointer Function %stype\n"
7733                         "%f32_n_1           = OpConstant %f32 -1.0\n"
7734                         "%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
7735                         "%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7736                         "%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
7737
7738                         "%v                 = OpVariable %fp_stype Function %cval\n"
7739                         "%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7740                         "%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
7741                         "%vec_val           = OpLoad %v4f32 %vec_ptr\n"
7742                         "%f32_val           = OpLoad %f32 %f32_ptr\n"
7743                         "%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7744                         "%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7745                         "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7746                 },
7747                 {
7748                         // [1|0|0|0.5] [x] = x + 0.5
7749                         // [0|1|0|0.5] [y] = y + 0.5
7750                         // [0|0|1|0.5] [z] = z + 0.5
7751                         // [0|0|0|1  ] [1] = 1
7752                         "matrix",
7753
7754                         "%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
7755                         "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7756                         "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7757                         "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7758                         "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7759                         "%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7760
7761                         "%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
7762                 },
7763                 {
7764                         "array",
7765
7766                         "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7767                         "%fp_a4f32            = OpTypePointer Function %a4f32\n"
7768                         "%f32_n_1             = OpConstant %f32 -1.0\n"
7769                         "%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
7770                         "%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7771
7772                         "%v                   = OpVariable %fp_a4f32 Function %carr\n"
7773                         "%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
7774                         "%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
7775                         "%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
7776                         "%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
7777                         "%f_val               = OpLoad %f32 %f\n"
7778                         "%f1_val              = OpLoad %f32 %f1\n"
7779                         "%f2_val              = OpLoad %f32 %f2\n"
7780                         "%f3_val              = OpLoad %f32 %f3\n"
7781                         "%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
7782                         "%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
7783                         "%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
7784                         "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7785                         "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7786                 },
7787                 {
7788                         //
7789                         // [
7790                         //   {
7791                         //      0.0,
7792                         //      [ 1.0, 1.0, 1.0, 1.0]
7793                         //   },
7794                         //   {
7795                         //      1.0,
7796                         //      [ 0.0, 0.5, 0.0, 0.0]
7797                         //   }, //     ^^^
7798                         //   {
7799                         //      0.0,
7800                         //      [ 1.0, 1.0, 1.0, 1.0]
7801                         //   }
7802                         // ]
7803                         "array_of_struct_of_array",
7804
7805                         "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7806                         "%fp_a4f32            = OpTypePointer Function %a4f32\n"
7807                         "%stype               = OpTypeStruct %f32 %a4f32\n"
7808                         "%a3stype             = OpTypeArray %stype %c_u32_3\n"
7809                         "%fp_a3stype          = OpTypePointer Function %a3stype\n"
7810                         "%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7811                         "%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7812                         "%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7813                         "%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7814                         "%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7815
7816                         "%v                   = OpVariable %fp_a3stype Function %carr\n"
7817                         "%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7818                         "%f_l                 = OpLoad %f32 %f\n"
7819                         "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7820                         "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7821                 }
7822         };
7823
7824         getHalfColorsFullAlpha(inputColors);
7825         outputColors[0] = RGBA(255, 255, 255, 255);
7826         outputColors[1] = RGBA(255, 127, 127, 255);
7827         outputColors[2] = RGBA(127, 255, 127, 255);
7828         outputColors[3] = RGBA(127, 127, 255, 255);
7829
7830         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7831         {
7832                 map<string, string> fragments;
7833                 fragments["pre_main"] = tests[testNdx].constants;
7834                 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7835                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7836         }
7837         return opConstantCompositeTests.release();
7838 }
7839
7840 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7841 {
7842         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7843         RGBA                                                    inputColors[4];
7844         RGBA                                                    outputColors[4];
7845         map<string, string>                             fragments;
7846
7847         // vec4 test_code(vec4 param) {
7848         //   vec4 result = param;
7849         //   for (int i = 0; i < 4; ++i) {
7850         //     if (i == 0) result[i] = 0.;
7851         //     else        result[i] = 1. - result[i];
7852         //   }
7853         //   return result;
7854         // }
7855         const char                                              function[]                      =
7856                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7857                 "%param1    = OpFunctionParameter %v4f32\n"
7858                 "%lbl       = OpLabel\n"
7859                 "%iptr      = OpVariable %fp_i32 Function\n"
7860                 "%result    = OpVariable %fp_v4f32 Function\n"
7861                 "             OpStore %iptr %c_i32_0\n"
7862                 "             OpStore %result %param1\n"
7863                 "             OpBranch %loop\n"
7864
7865                 // Loop entry block.
7866                 "%loop      = OpLabel\n"
7867                 "%ival      = OpLoad %i32 %iptr\n"
7868                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7869                 "             OpLoopMerge %exit %if_entry None\n"
7870                 "             OpBranchConditional %lt_4 %if_entry %exit\n"
7871
7872                 // Merge block for loop.
7873                 "%exit      = OpLabel\n"
7874                 "%ret       = OpLoad %v4f32 %result\n"
7875                 "             OpReturnValue %ret\n"
7876
7877                 // If-statement entry block.
7878                 "%if_entry  = OpLabel\n"
7879                 "%loc       = OpAccessChain %fp_f32 %result %ival\n"
7880                 "%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
7881                 "             OpSelectionMerge %if_exit None\n"
7882                 "             OpBranchConditional %eq_0 %if_true %if_false\n"
7883
7884                 // False branch for if-statement.
7885                 "%if_false  = OpLabel\n"
7886                 "%val       = OpLoad %f32 %loc\n"
7887                 "%sub       = OpFSub %f32 %c_f32_1 %val\n"
7888                 "             OpStore %loc %sub\n"
7889                 "             OpBranch %if_exit\n"
7890
7891                 // Merge block for if-statement.
7892                 "%if_exit   = OpLabel\n"
7893                 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7894                 "             OpStore %iptr %ival_next\n"
7895                 "             OpBranch %loop\n"
7896
7897                 // True branch for if-statement.
7898                 "%if_true   = OpLabel\n"
7899                 "             OpStore %loc %c_f32_0\n"
7900                 "             OpBranch %if_exit\n"
7901
7902                 "             OpFunctionEnd\n";
7903
7904         fragments["testfun"]    = function;
7905
7906         inputColors[0]                  = RGBA(127, 127, 127, 0);
7907         inputColors[1]                  = RGBA(127, 0,   0,   0);
7908         inputColors[2]                  = RGBA(0,   127, 0,   0);
7909         inputColors[3]                  = RGBA(0,   0,   127, 0);
7910
7911         outputColors[0]                 = RGBA(0, 128, 128, 255);
7912         outputColors[1]                 = RGBA(0, 255, 255, 255);
7913         outputColors[2]                 = RGBA(0, 128, 255, 255);
7914         outputColors[3]                 = RGBA(0, 255, 128, 255);
7915
7916         createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7917
7918         return group.release();
7919 }
7920
7921 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7922 {
7923         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7924         RGBA                                                    inputColors[4];
7925         RGBA                                                    outputColors[4];
7926         map<string, string>                             fragments;
7927
7928         const char                                              typesAndConstants[]     =
7929                 "%c_f32_p2  = OpConstant %f32 0.2\n"
7930                 "%c_f32_p4  = OpConstant %f32 0.4\n"
7931                 "%c_f32_p6  = OpConstant %f32 0.6\n"
7932                 "%c_f32_p8  = OpConstant %f32 0.8\n";
7933
7934         // vec4 test_code(vec4 param) {
7935         //   vec4 result = param;
7936         //   for (int i = 0; i < 4; ++i) {
7937         //     switch (i) {
7938         //       case 0: result[i] += .2; break;
7939         //       case 1: result[i] += .6; break;
7940         //       case 2: result[i] += .4; break;
7941         //       case 3: result[i] += .8; break;
7942         //       default: break; // unreachable
7943         //     }
7944         //   }
7945         //   return result;
7946         // }
7947         const char                                              function[]                      =
7948                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7949                 "%param1    = OpFunctionParameter %v4f32\n"
7950                 "%lbl       = OpLabel\n"
7951                 "%iptr      = OpVariable %fp_i32 Function\n"
7952                 "%result    = OpVariable %fp_v4f32 Function\n"
7953                 "             OpStore %iptr %c_i32_0\n"
7954                 "             OpStore %result %param1\n"
7955                 "             OpBranch %loop\n"
7956
7957                 // Loop entry block.
7958                 "%loop      = OpLabel\n"
7959                 "%ival      = OpLoad %i32 %iptr\n"
7960                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7961                 "             OpLoopMerge %exit %cont None\n"
7962                 "             OpBranchConditional %lt_4 %switch_entry %exit\n"
7963
7964                 // Merge block for loop.
7965                 "%exit      = OpLabel\n"
7966                 "%ret       = OpLoad %v4f32 %result\n"
7967                 "             OpReturnValue %ret\n"
7968
7969                 // Switch-statement entry block.
7970                 "%switch_entry   = OpLabel\n"
7971                 "%loc            = OpAccessChain %fp_f32 %result %ival\n"
7972                 "%val            = OpLoad %f32 %loc\n"
7973                 "                  OpSelectionMerge %switch_exit None\n"
7974                 "                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7975
7976                 "%case2          = OpLabel\n"
7977                 "%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
7978                 "                  OpStore %loc %addp4\n"
7979                 "                  OpBranch %switch_exit\n"
7980
7981                 "%switch_default = OpLabel\n"
7982                 "                  OpUnreachable\n"
7983
7984                 "%case3          = OpLabel\n"
7985                 "%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
7986                 "                  OpStore %loc %addp8\n"
7987                 "                  OpBranch %switch_exit\n"
7988
7989                 "%case0          = OpLabel\n"
7990                 "%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
7991                 "                  OpStore %loc %addp2\n"
7992                 "                  OpBranch %switch_exit\n"
7993
7994                 // Merge block for switch-statement.
7995                 "%switch_exit    = OpLabel\n"
7996                 "%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
7997                 "                  OpStore %iptr %ival_next\n"
7998                 "                  OpBranch %cont\n"
7999                 "%cont           = OpLabel\n"
8000                 "                  OpBranch %loop\n"
8001
8002                 "%case1          = OpLabel\n"
8003                 "%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
8004                 "                  OpStore %loc %addp6\n"
8005                 "                  OpBranch %switch_exit\n"
8006
8007                 "                  OpFunctionEnd\n";
8008
8009         fragments["pre_main"]   = typesAndConstants;
8010         fragments["testfun"]    = function;
8011
8012         inputColors[0]                  = RGBA(127, 27,  127, 51);
8013         inputColors[1]                  = RGBA(127, 0,   0,   51);
8014         inputColors[2]                  = RGBA(0,   27,  0,   51);
8015         inputColors[3]                  = RGBA(0,   0,   127, 51);
8016
8017         outputColors[0]                 = RGBA(178, 180, 229, 255);
8018         outputColors[1]                 = RGBA(178, 153, 102, 255);
8019         outputColors[2]                 = RGBA(51,  180, 102, 255);
8020         outputColors[3]                 = RGBA(51,  153, 229, 255);
8021
8022         createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8023
8024         addOpSwitchAmberTests(*group, testCtx);
8025
8026         return group.release();
8027 }
8028
8029 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
8030 {
8031         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
8032         RGBA                                                    inputColors[4];
8033         RGBA                                                    outputColors[4];
8034         map<string, string>                             fragments;
8035
8036         const char                                              decorations[]           =
8037                 "OpDecorate %array_group         ArrayStride 4\n"
8038                 "OpDecorate %struct_member_group Offset 0\n"
8039                 "%array_group         = OpDecorationGroup\n"
8040                 "%struct_member_group = OpDecorationGroup\n"
8041
8042                 "OpDecorate %group1 RelaxedPrecision\n"
8043                 "OpDecorate %group3 RelaxedPrecision\n"
8044                 "OpDecorate %group3 Flat\n"
8045                 "OpDecorate %group3 Restrict\n"
8046                 "%group0 = OpDecorationGroup\n"
8047                 "%group1 = OpDecorationGroup\n"
8048                 "%group3 = OpDecorationGroup\n";
8049
8050         const char                                              typesAndConstants[]     =
8051                 "%a3f32     = OpTypeArray %f32 %c_u32_3\n"
8052                 "%struct1   = OpTypeStruct %a3f32\n"
8053                 "%struct2   = OpTypeStruct %a3f32\n"
8054                 "%fp_struct1 = OpTypePointer Function %struct1\n"
8055                 "%fp_struct2 = OpTypePointer Function %struct2\n"
8056                 "%c_f32_2    = OpConstant %f32 2.\n"
8057                 "%c_f32_n2   = OpConstant %f32 -2.\n"
8058
8059                 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8060                 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8061                 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8062                 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8063
8064         const char                                              function[]                      =
8065                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8066                 "%param     = OpFunctionParameter %v4f32\n"
8067                 "%entry     = OpLabel\n"
8068                 "%result    = OpVariable %fp_v4f32 Function\n"
8069                 "%v_struct1 = OpVariable %fp_struct1 Function\n"
8070                 "%v_struct2 = OpVariable %fp_struct2 Function\n"
8071                 "             OpStore %result %param\n"
8072                 "             OpStore %v_struct1 %c_struct1\n"
8073                 "             OpStore %v_struct2 %c_struct2\n"
8074                 "%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8075                 "%val1      = OpLoad %f32 %ptr1\n"
8076                 "%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8077                 "%val2      = OpLoad %f32 %ptr2\n"
8078                 "%addvalues = OpFAdd %f32 %val1 %val2\n"
8079                 "%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8080                 "%val       = OpLoad %f32 %ptr\n"
8081                 "%addresult = OpFAdd %f32 %addvalues %val\n"
8082                 "             OpStore %ptr %addresult\n"
8083                 "%ret       = OpLoad %v4f32 %result\n"
8084                 "             OpReturnValue %ret\n"
8085                 "             OpFunctionEnd\n";
8086
8087         struct CaseNameDecoration
8088         {
8089                 string name;
8090                 string decoration;
8091         };
8092
8093         CaseNameDecoration tests[] =
8094         {
8095                 {
8096                         "same_decoration_group_on_multiple_types",
8097                         "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
8098                 },
8099                 {
8100                         "empty_decoration_group",
8101                         "OpGroupDecorate %group0      %a3f32\n"
8102                         "OpGroupDecorate %group0      %result\n"
8103                 },
8104                 {
8105                         "one_element_decoration_group",
8106                         "OpGroupDecorate %array_group %a3f32\n"
8107                 },
8108                 {
8109                         "multiple_elements_decoration_group",
8110                         "OpGroupDecorate %group3      %v_struct1\n"
8111                 },
8112                 {
8113                         "multiple_decoration_groups_on_same_variable",
8114                         "OpGroupDecorate %group0      %v_struct2\n"
8115                         "OpGroupDecorate %group1      %v_struct2\n"
8116                         "OpGroupDecorate %group3      %v_struct2\n"
8117                 },
8118                 {
8119                         "same_decoration_group_multiple_times",
8120                         "OpGroupDecorate %group1      %addvalues\n"
8121                         "OpGroupDecorate %group1      %addvalues\n"
8122                         "OpGroupDecorate %group1      %addvalues\n"
8123                 },
8124
8125         };
8126
8127         getHalfColorsFullAlpha(inputColors);
8128         getHalfColorsFullAlpha(outputColors);
8129
8130         for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8131         {
8132                 fragments["decoration"] = decorations + tests[idx].decoration;
8133                 fragments["pre_main"]   = typesAndConstants;
8134                 fragments["testfun"]    = function;
8135
8136                 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8137         }
8138
8139         return group.release();
8140 }
8141
8142 struct SpecConstantTwoValGraphicsCase
8143 {
8144         const std::string       caseName;
8145         const std::string       scDefinition0;
8146         const std::string       scDefinition1;
8147         const std::string       scResultType;
8148         const std::string       scOperation;
8149         SpecConstantValue       scActualValue0;
8150         SpecConstantValue       scActualValue1;
8151         const std::string       resultOperation;
8152         RGBA                            expectedColors[4];
8153         CaseFlags                       caseFlags;
8154
8155                                                 SpecConstantTwoValGraphicsCase (const std::string&                      name,
8156                                                                                                                 const std::string&                      definition0,
8157                                                                                                                 const std::string&                      definition1,
8158                                                                                                                 const std::string&                      resultType,
8159                                                                                                                 const std::string&                      operation,
8160                                                                                                                 const SpecConstantValue&        value0,
8161                                                                                                                 const SpecConstantValue&        value1,
8162                                                                                                                 const std::string&                      resultOp,
8163                                                                                                                 const RGBA                                      (&output)[4],
8164                                                                                                                 CaseFlags                                       flags = FLAG_NONE)
8165                                                         : caseName                              (name)
8166                                                         , scDefinition0                 (definition0)
8167                                                         , scDefinition1                 (definition1)
8168                                                         , scResultType                  (resultType)
8169                                                         , scOperation                   (operation)
8170                                                         , scActualValue0                (value0)
8171                                                         , scActualValue1                (value1)
8172                                                         , resultOperation               (resultOp)
8173                                                         , caseFlags                             (flags)
8174         {
8175                 expectedColors[0] = output[0];
8176                 expectedColors[1] = output[1];
8177                 expectedColors[2] = output[2];
8178                 expectedColors[3] = output[3];
8179         }
8180 };
8181
8182 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
8183 {
8184         de::MovePtr<tcu::TestCaseGroup>                 group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
8185         vector<SpecConstantTwoValGraphicsCase>  cases;
8186         RGBA                                                                    inputColors[4];
8187         RGBA                                                                    outputColors0[4];
8188         RGBA                                                                    outputColors1[4];
8189         RGBA                                                                    outputColors2[4];
8190
8191         const char      decorations1[]                  =
8192                 "OpDecorate %sc_0  SpecId 0\n"
8193                 "OpDecorate %sc_1  SpecId 1\n";
8194
8195         const char      typesAndConstants1[]    =
8196                 "${OPTYPE_DEFINITIONS:opt}"
8197                 "%sc_0      = OpSpecConstant${SC_DEF0}\n"
8198                 "%sc_1      = OpSpecConstant${SC_DEF1}\n"
8199                 "%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8200
8201         const char      function1[]                             =
8202                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8203                 "%param     = OpFunctionParameter %v4f32\n"
8204                 "%label     = OpLabel\n"
8205                 "%result    = OpVariable %fp_v4f32 Function\n"
8206                 "${TYPE_CONVERT:opt}"
8207                 "             OpStore %result %param\n"
8208                 "%gen       = ${GEN_RESULT}\n"
8209                 "%index     = OpIAdd %i32 %gen %c_i32_1\n"
8210                 "%loc       = OpAccessChain %fp_f32 %result %index\n"
8211                 "%val       = OpLoad %f32 %loc\n"
8212                 "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8213                 "             OpStore %loc %add\n"
8214                 "%ret       = OpLoad %v4f32 %result\n"
8215                 "             OpReturnValue %ret\n"
8216                 "             OpFunctionEnd\n";
8217
8218         inputColors[0] = RGBA(127, 127, 127, 255);
8219         inputColors[1] = RGBA(127, 0,   0,   255);
8220         inputColors[2] = RGBA(0,   127, 0,   255);
8221         inputColors[3] = RGBA(0,   0,   127, 255);
8222
8223         // Derived from inputColors[x] by adding 128 to inputColors[x][0].
8224         outputColors0[0] = RGBA(255, 127, 127, 255);
8225         outputColors0[1] = RGBA(255, 0,   0,   255);
8226         outputColors0[2] = RGBA(128, 127, 0,   255);
8227         outputColors0[3] = RGBA(128, 0,   127, 255);
8228
8229         // Derived from inputColors[x] by adding 128 to inputColors[x][1].
8230         outputColors1[0] = RGBA(127, 255, 127, 255);
8231         outputColors1[1] = RGBA(127, 128, 0,   255);
8232         outputColors1[2] = RGBA(0,   255, 0,   255);
8233         outputColors1[3] = RGBA(0,   128, 127, 255);
8234
8235         // Derived from inputColors[x] by adding 128 to inputColors[x][2].
8236         outputColors2[0] = RGBA(127, 127, 255, 255);
8237         outputColors2[1] = RGBA(127, 0,   128, 255);
8238         outputColors2[2] = RGBA(0,   127, 128, 255);
8239         outputColors2[3] = RGBA(0,   0,   255, 255);
8240
8241         const char addZeroToSc[]                = "OpIAdd %i32 %c_i32_0 %sc_op";
8242         const char addZeroToSc32[]              = "OpIAdd %i32 %c_i32_0 %sc_op32";
8243         const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8244         const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8245
8246         cases.push_back(SpecConstantTwoValGraphicsCase("iadd",                                                  " %i32 0",              " %i32 0",              "%i32",         "IAdd                 %sc_0 %sc_1",                             19,                                     -20,                            addZeroToSc,            outputColors0));
8247         cases.push_back(SpecConstantTwoValGraphicsCase("isub",                                                  " %i32 0",              " %i32 0",              "%i32",         "ISub                 %sc_0 %sc_1",                             19,                                     20,                                     addZeroToSc,            outputColors0));
8248         cases.push_back(SpecConstantTwoValGraphicsCase("imul",                                                  " %i32 0",              " %i32 0",              "%i32",         "IMul                 %sc_0 %sc_1",                             -1,                                     -1,                                     addZeroToSc,            outputColors2));
8249         cases.push_back(SpecConstantTwoValGraphicsCase("sdiv",                                                  " %i32 0",              " %i32 0",              "%i32",         "SDiv                 %sc_0 %sc_1",                             -126,                           126,                            addZeroToSc,            outputColors0));
8250         cases.push_back(SpecConstantTwoValGraphicsCase("udiv",                                                  " %i32 0",              " %i32 0",              "%i32",         "UDiv                 %sc_0 %sc_1",                             126,                            126,                            addZeroToSc,            outputColors2));
8251         cases.push_back(SpecConstantTwoValGraphicsCase("srem",                                                  " %i32 0",              " %i32 0",              "%i32",         "SRem                 %sc_0 %sc_1",                             3,                                      2,                                      addZeroToSc,            outputColors2));
8252         cases.push_back(SpecConstantTwoValGraphicsCase("smod",                                                  " %i32 0",              " %i32 0",              "%i32",         "SMod                 %sc_0 %sc_1",                             3,                                      2,                                      addZeroToSc,            outputColors2));
8253         cases.push_back(SpecConstantTwoValGraphicsCase("umod",                                                  " %i32 0",              " %i32 0",              "%i32",         "UMod                 %sc_0 %sc_1",                             1001,                           500,                            addZeroToSc,            outputColors2));
8254         cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand",                                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseAnd           %sc_0 %sc_1",                             0x33,                           0x0d,                           addZeroToSc,            outputColors2));
8255         cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor",                                             " %i32 0",              " %i32 0",              "%i32",         "BitwiseOr            %sc_0 %sc_1",                             0,                                      1,                                      addZeroToSc,            outputColors2));
8256         cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor",                                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseXor           %sc_0 %sc_1",                             0x2e,                           0x2f,                           addZeroToSc,            outputColors2));
8257         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical",                             " %i32 0",              " %i32 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                             2,                                      1,                                      addZeroToSc,            outputColors2));
8258         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic",                  " %i32 0",              " %i32 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                             -4,                                     2,                                      addZeroToSc,            outputColors0));
8259         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical",                              " %i32 0",              " %i32 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                             1,                                      0,                                      addZeroToSc,            outputColors2));
8260
8261         // Shifts for other integer sizes.
8262         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64",                 " %i64 0",              " %i64 0",              "%i64",         "ShiftRightLogical    %sc_0 %sc_1",                             deInt64{2},                     deInt64{1},                     addZeroToSc32,          outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8263         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64",              " %i64 0",              " %i64 0",              "%i64",         "ShiftRightArithmetic %sc_0 %sc_1",                             deInt64{-4},            deInt64{2},                     addZeroToSc32,          outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8264         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64",                  " %i64 0",              " %i64 0",              "%i64",         "ShiftLeftLogical     %sc_0 %sc_1",                             deInt64{1},                     deInt64{0},                     addZeroToSc32,          outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8265         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16",                 " %i16 0",              " %i16 0",              "%i16",         "ShiftRightLogical    %sc_0 %sc_1",                             deInt16{2},                     deInt16{1},                     addZeroToSc32,          outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8266         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16",              " %i16 0",              " %i16 0",              "%i16",         "ShiftRightArithmetic %sc_0 %sc_1",                             deInt16{-4},            deInt16{2},                     addZeroToSc32,          outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8267         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16",                  " %i16 0",              " %i16 0",              "%i16",         "ShiftLeftLogical     %sc_0 %sc_1",                             deInt16{1},                     deInt16{0},                     addZeroToSc32,          outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8268         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8",                  " %i8 0",               " %i8 0",               "%i8",          "ShiftRightLogical    %sc_0 %sc_1",                             deInt8{2},                      deInt8{1},                      addZeroToSc32,          outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8269         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8",               " %i8 0",               " %i8 0",               "%i8",          "ShiftRightArithmetic %sc_0 %sc_1",                             deInt8{-4},                     deInt8{2},                      addZeroToSc32,          outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8270         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8",                   " %i8 0",               " %i8 0",               "%i8",          "ShiftLeftLogical     %sc_0 %sc_1",                             deInt8{1},                      deInt8{0},                      addZeroToSc32,          outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8271
8272         // Shifts for other integer sizes but only in the shift amount.
8273         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64",               " %i32 0",              " %i64 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                             2,                                      deInt64{1},                     addZeroToSc,            outputColors2, (FLAG_I64)));
8274         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64",    " %i32 0",              " %i64 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                             -4,                                     deInt64{2},                     addZeroToSc,            outputColors0, (FLAG_I64)));
8275         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64",                " %i32 0",              " %i64 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                             1,                                      deInt64{0},                     addZeroToSc,            outputColors2, (FLAG_I64)));
8276         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16",               " %i32 0",              " %i16 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                             2,                                      deInt16{1},                     addZeroToSc,            outputColors2, (FLAG_I16)));
8277         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16",    " %i32 0",              " %i16 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                             -4,                                     deInt16{2},                     addZeroToSc,            outputColors0, (FLAG_I16)));
8278         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16",                " %i32 0",              " %i16 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                             1,                                      deInt16{0},                     addZeroToSc,            outputColors2, (FLAG_I16)));
8279         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8",                " %i32 0",              " %i8 0",               "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                             2,                                      deInt8{1},                      addZeroToSc,            outputColors2, (FLAG_I8)));
8280         cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8",             " %i32 0",              " %i8 0",               "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                             -4,                                     deInt8{2},                      addZeroToSc,            outputColors0, (FLAG_I8)));
8281         cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8",                 " %i32 0",              " %i8 0",               "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                             1,                                      deInt8{0},                      addZeroToSc,            outputColors2, (FLAG_I8)));
8282
8283         cases.push_back(SpecConstantTwoValGraphicsCase("slessthan",                                             " %i32 0",              " %i32 0",              "%bool",        "SLessThan            %sc_0 %sc_1",                             -20,                            -10,                            selectTrueUsingSc,      outputColors2));
8284         cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan",                                             " %i32 0",              " %i32 0",              "%bool",        "ULessThan            %sc_0 %sc_1",                             10,                                     20,                                     selectTrueUsingSc,      outputColors2));
8285         cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan",                                  " %i32 0",              " %i32 0",              "%bool",        "SGreaterThan         %sc_0 %sc_1",                             -1000,                          50,                                     selectFalseUsingSc,     outputColors2));
8286         cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan",                                  " %i32 0",              " %i32 0",              "%bool",        "UGreaterThan         %sc_0 %sc_1",                             10,                                     5,                                      selectTrueUsingSc,      outputColors2));
8287         cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal",                                " %i32 0",              " %i32 0",              "%bool",        "SLessThanEqual       %sc_0 %sc_1",                             -10,                            -10,                            selectTrueUsingSc,      outputColors2));
8288         cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal",                                " %i32 0",              " %i32 0",              "%bool",        "ULessThanEqual       %sc_0 %sc_1",                             50,                                     100,                            selectTrueUsingSc,      outputColors2));
8289         cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal",                             " %i32 0",              " %i32 0",              "%bool",        "SGreaterThanEqual    %sc_0 %sc_1",                             -1000,                          50,                                     selectFalseUsingSc,     outputColors2));
8290         cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal",                             " %i32 0",              " %i32 0",              "%bool",        "UGreaterThanEqual    %sc_0 %sc_1",                             10,                                     10,                                     selectTrueUsingSc,      outputColors2));
8291         cases.push_back(SpecConstantTwoValGraphicsCase("iequal",                                                " %i32 0",              " %i32 0",              "%bool",        "IEqual               %sc_0 %sc_1",                             42,                                     24,                                     selectFalseUsingSc,     outputColors2));
8292         cases.push_back(SpecConstantTwoValGraphicsCase("inotequal",                                             " %i32 0",              " %i32 0",              "%bool",        "INotEqual            %sc_0 %sc_1",                             42,                                     24,                                     selectTrueUsingSc,      outputColors2));
8293         cases.push_back(SpecConstantTwoValGraphicsCase("logicaland",                                    "True %bool",   "True %bool",   "%bool",        "LogicalAnd           %sc_0 %sc_1",                             0,                                      1,                                      selectFalseUsingSc,     outputColors2));
8294         cases.push_back(SpecConstantTwoValGraphicsCase("logicalor",                                             "False %bool",  "False %bool",  "%bool",        "LogicalOr            %sc_0 %sc_1",                             1,                                      0,                                      selectTrueUsingSc,      outputColors2));
8295         cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal",                                  "True %bool",   "True %bool",   "%bool",        "LogicalEqual         %sc_0 %sc_1",                             0,                                      1,                                      selectFalseUsingSc,     outputColors2));
8296         cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal",                               "False %bool",  "False %bool",  "%bool",        "LogicalNotEqual      %sc_0 %sc_1",                             1,                                      0,                                      selectTrueUsingSc,      outputColors2));
8297         cases.push_back(SpecConstantTwoValGraphicsCase("snegate",                                               " %i32 0",              " %i32 0",              "%i32",         "SNegate              %sc_0",                                   -1,                                     0,                                      addZeroToSc,            outputColors2));
8298         cases.push_back(SpecConstantTwoValGraphicsCase("not",                                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                                   -2,                                     0,                                      addZeroToSc,            outputColors2));
8299         cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot",                                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                                   1,                                      0,                                      selectFalseUsingSc,     outputColors2));
8300         cases.push_back(SpecConstantTwoValGraphicsCase("select",                                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %c_i32_0",    1,                                      1,                                      addZeroToSc,            outputColors2));
8301         cases.push_back(SpecConstantTwoValGraphicsCase("sconvert",                                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                                   -1,                                     0,                                      addZeroToSc32,          outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8302         cases.push_back(SpecConstantTwoValGraphicsCase("fconvert",                                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                                   tcu::Float32(-1.0),     tcu::Float32(0.0),      addZeroToSc32,          outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8303         cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16",                                    " %f16 0",              " %f16 0",              "%f32",         "FConvert             %sc_0",                                   tcu::Float16(-1.0),     tcu::Float16(0.0),      addZeroToSc32,          outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8304         // \todo[2015-12-1 antiagainst] OpQuantizeToF16
8305
8306         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8307         {
8308                 map<string, string>                     specializations;
8309                 map<string, string>                     fragments;
8310                 SpecConstants                           specConstants;
8311                 PushConstants                           noPushConstants;
8312                 GraphicsResources                       noResources;
8313                 GraphicsInterfaces                      noInterfaces;
8314                 vector<string>                          extensions;
8315                 VulkanFeatures                          requiredFeatures;
8316
8317                 // Special SPIR-V code when using 16-bit integers.
8318                 if (cases[caseNdx].caseFlags & FLAG_I16)
8319                 {
8320                         requiredFeatures.coreFeatures.shaderInt16               = VK_TRUE;
8321                         fragments["capability"]                                                 += "OpCapability Int16\n";                                                      // Adds 16-bit integer capability
8322                         specializations["OPTYPE_DEFINITIONS"]                   += "%i16 = OpTypeInt 16 1\n";                                           // Adds 16-bit integer type
8323                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8324                                 specializations["TYPE_CONVERT"]                         += "%sc_op32 = OpSConvert %i32 %sc_op\n";                       // Converts 16-bit integer to 32-bit integer
8325                 }
8326
8327                 // Special SPIR-V code when using 64-bit integers.
8328                 if (cases[caseNdx].caseFlags & FLAG_I64)
8329                 {
8330                         requiredFeatures.coreFeatures.shaderInt64               = VK_TRUE;
8331                         fragments["capability"]                                                 += "OpCapability Int64\n";                                                      // Adds 64-bit integer capability
8332                         specializations["OPTYPE_DEFINITIONS"]                   += "%i64 = OpTypeInt 64 1\n";                                           // Adds 64-bit integer type
8333                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8334                                 specializations["TYPE_CONVERT"]                         += "%sc_op32 = OpSConvert %i32 %sc_op\n";                       // Converts 64-bit integer to 32-bit integer
8335                 }
8336
8337                 // Special SPIR-V code when using 64-bit floats.
8338                 if (cases[caseNdx].caseFlags & FLAG_F64)
8339                 {
8340                         requiredFeatures.coreFeatures.shaderFloat64             = VK_TRUE;
8341                         fragments["capability"]                                                 += "OpCapability Float64\n";                                            // Adds 64-bit float capability
8342                         specializations["OPTYPE_DEFINITIONS"]                   += "%f64 = OpTypeFloat 64\n";                                           // Adds 64-bit float type
8343                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8344                                 specializations["TYPE_CONVERT"]                         += "%sc_op32 = OpConvertFToS %i32 %sc_op\n";            // Converts 64-bit float to 32-bit integer
8345                 }
8346
8347                 // Extension needed for float16 and int8.
8348                 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8349                         extensions.push_back("VK_KHR_shader_float16_int8");
8350
8351                 // Special SPIR-V code when using 16-bit floats.
8352                 if (cases[caseNdx].caseFlags & FLAG_F16)
8353                 {
8354                         requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8355                         fragments["capability"]                                         += "OpCapability Float16\n";                                            // Adds 16-bit float capability
8356                         specializations["OPTYPE_DEFINITIONS"]           += "%f16 = OpTypeFloat 16\n";                                           // Adds 16-bit float type
8357                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8358                                 specializations["TYPE_CONVERT"]                 += "%sc_op32 = OpConvertFToS %i32 %sc_op\n";            // Converts 16-bit float to 32-bit integer
8359                 }
8360
8361                 // Special SPIR-V code when using 8-bit integers.
8362                 if (cases[caseNdx].caseFlags & FLAG_I8)
8363                 {
8364                         requiredFeatures.extFloat16Int8.shaderInt8 = true;
8365                         fragments["capability"]                                         += "OpCapability Int8\n";                                               // Adds 8-bit integer capability
8366                         specializations["OPTYPE_DEFINITIONS"]           += "%i8 = OpTypeInt 8 1\n";                                             // Adds 8-bit integer type
8367                         if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8368                                 specializations["TYPE_CONVERT"]                 += "%sc_op32 = OpSConvert %i32 %sc_op\n";               // Converts 8-bit integer to 32-bit integer
8369                 }
8370
8371                 specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
8372                 specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
8373                 specializations["SC_RESULT_TYPE"]       = cases[caseNdx].scResultType;
8374                 specializations["SC_OP"]                        = cases[caseNdx].scOperation;
8375                 specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
8376
8377                 fragments["decoration"]                         = tcu::StringTemplate(decorations1).specialize(specializations);
8378                 fragments["pre_main"]                           = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8379                 fragments["testfun"]                            = tcu::StringTemplate(function1).specialize(specializations);
8380
8381                 cases[caseNdx].scActualValue0.appendTo(specConstants);
8382                 cases[caseNdx].scActualValue1.appendTo(specConstants);
8383
8384                 createTestsForAllStages(
8385                         cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
8386                         noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
8387         }
8388
8389         const char                      decorations2[]          =
8390                 "OpDecorate %sc_0  SpecId 0\n"
8391                 "OpDecorate %sc_1  SpecId 1\n"
8392                 "OpDecorate %sc_2  SpecId 2\n";
8393
8394         const std::string       typesAndConstants2      =
8395                 "%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8396                 "%vec3_undef  = OpUndef %v3i32\n"
8397
8398                 + getSpecConstantOpStructConstantsAndTypes() + getSpecConstantOpStructComposites() +
8399
8400                 "%sc_0        = OpSpecConstant %i32 0\n"
8401                 "%sc_1        = OpSpecConstant %i32 0\n"
8402                 "%sc_2        = OpSpecConstant %i32 0\n"
8403
8404                 + getSpecConstantOpStructConstBlock() +
8405
8406                 "%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"                                                 // (sc_0, 0,    0)
8407                 "%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"                                                 // (0,    sc_1, 0)
8408                 "%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"                                                 // (0,    0,    sc_2)
8409                 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
8410                 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
8411                 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
8412                 "%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
8413                 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
8414                 "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
8415                 "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
8416                 "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
8417                 "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
8418                 "%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";                                                             // (sc_2 - sc_0) * sc_1
8419
8420         const std::string       function2                       =
8421                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8422                 "%param     = OpFunctionParameter %v4f32\n"
8423                 "%label     = OpLabel\n"
8424                 "%result    = OpVariable %fp_v4f32 Function\n"
8425
8426                 + getSpecConstantOpStructInstructions() +
8427
8428                 "             OpStore %result %param\n"
8429                 "%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
8430                 "%val       = OpLoad %f32 %loc\n"
8431                 "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8432                 "             OpStore %loc %add\n"
8433                 "%ret       = OpLoad %v4f32 %result\n"
8434                 "             OpReturnValue %ret\n"
8435                 "             OpFunctionEnd\n";
8436
8437         map<string, string>     fragments;
8438         SpecConstants           specConstants;
8439
8440         fragments["decoration"] = decorations2;
8441         fragments["pre_main"]   = typesAndConstants2;
8442         fragments["testfun"]    = function2;
8443
8444         specConstants.append<deInt32>(56789);
8445         specConstants.append<deInt32>(-2);
8446         specConstants.append<deInt32>(56788);
8447
8448         createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8449
8450         return group.release();
8451 }
8452
8453 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8454 {
8455         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8456         RGBA                                                    inputColors[4];
8457         RGBA                                                    outputColors1[4];
8458         RGBA                                                    outputColors2[4];
8459         RGBA                                                    outputColors3[4];
8460         RGBA                                                    outputColors4[4];
8461         map<string, string>                             fragments1;
8462         map<string, string>                             fragments2;
8463         map<string, string>                             fragments3;
8464         map<string, string>                             fragments4;
8465         std::vector<std::string>                extensions4;
8466         GraphicsResources                               resources4;
8467         VulkanFeatures                                  vulkanFeatures4;
8468
8469         const char      typesAndConstants1[]    =
8470                 "%c_f32_p2  = OpConstant %f32 0.2\n"
8471                 "%c_f32_p4  = OpConstant %f32 0.4\n"
8472                 "%c_f32_p5  = OpConstant %f32 0.5\n"
8473                 "%c_f32_p8  = OpConstant %f32 0.8\n";
8474
8475         // vec4 test_code(vec4 param) {
8476         //   vec4 result = param;
8477         //   for (int i = 0; i < 4; ++i) {
8478         //     float operand;
8479         //     switch (i) {
8480         //       case 0: operand = .2; break;
8481         //       case 1: operand = .5; break;
8482         //       case 2: operand = .4; break;
8483         //       case 3: operand = .0; break;
8484         //       default: break; // unreachable
8485         //     }
8486         //     result[i] += operand;
8487         //   }
8488         //   return result;
8489         // }
8490         const char      function1[]                             =
8491                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8492                 "%param1    = OpFunctionParameter %v4f32\n"
8493                 "%lbl       = OpLabel\n"
8494                 "%iptr      = OpVariable %fp_i32 Function\n"
8495                 "%result    = OpVariable %fp_v4f32 Function\n"
8496                 "             OpStore %iptr %c_i32_0\n"
8497                 "             OpStore %result %param1\n"
8498                 "             OpBranch %loop\n"
8499
8500                 "%loop      = OpLabel\n"
8501                 "%ival      = OpLoad %i32 %iptr\n"
8502                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8503                 "             OpLoopMerge %exit %cont None\n"
8504                 "             OpBranchConditional %lt_4 %entry %exit\n"
8505
8506                 "%entry     = OpLabel\n"
8507                 "%loc       = OpAccessChain %fp_f32 %result %ival\n"
8508                 "%val       = OpLoad %f32 %loc\n"
8509                 "             OpSelectionMerge %phi None\n"
8510                 "             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8511
8512                 "%case0     = OpLabel\n"
8513                 "             OpBranch %phi\n"
8514                 "%case1     = OpLabel\n"
8515                 "             OpBranch %phi\n"
8516                 "%case2     = OpLabel\n"
8517                 "             OpBranch %phi\n"
8518                 "%case3     = OpLabel\n"
8519                 "             OpBranch %phi\n"
8520
8521                 "%default   = OpLabel\n"
8522                 "             OpUnreachable\n"
8523
8524                 "%phi       = OpLabel\n"
8525                 "%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8526                 "             OpBranch %cont\n"
8527                 "%cont      = OpLabel\n"
8528                 "%add       = OpFAdd %f32 %val %operand\n"
8529                 "             OpStore %loc %add\n"
8530                 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8531                 "             OpStore %iptr %ival_next\n"
8532                 "             OpBranch %loop\n"
8533
8534                 "%exit      = OpLabel\n"
8535                 "%ret       = OpLoad %v4f32 %result\n"
8536                 "             OpReturnValue %ret\n"
8537
8538                 "             OpFunctionEnd\n";
8539
8540         fragments1["pre_main"]  = typesAndConstants1;
8541         fragments1["testfun"]   = function1;
8542
8543         getHalfColorsFullAlpha(inputColors);
8544
8545         outputColors1[0]                = RGBA(178, 255, 229, 255);
8546         outputColors1[1]                = RGBA(178, 127, 102, 255);
8547         outputColors1[2]                = RGBA(51,  255, 102, 255);
8548         outputColors1[3]                = RGBA(51,  127, 229, 255);
8549
8550         createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8551
8552         const char      typesAndConstants2[]    =
8553                 "%c_f32_p2  = OpConstant %f32 0.2\n";
8554
8555         // Add .4 to the second element of the given parameter.
8556         const char      function2[]                             =
8557                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8558                 "%param     = OpFunctionParameter %v4f32\n"
8559                 "%entry     = OpLabel\n"
8560                 "%result    = OpVariable %fp_v4f32 Function\n"
8561                 "             OpStore %result %param\n"
8562                 "%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8563                 "%val       = OpLoad %f32 %loc\n"
8564                 "             OpBranch %phi\n"
8565
8566                 "%phi        = OpLabel\n"
8567                 "%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
8568                 "%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
8569                 "%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
8570                 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8571                 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8572                 "              OpLoopMerge %exit %phi None\n"
8573                 "              OpBranchConditional %still_loop %phi %exit\n"
8574
8575                 "%exit       = OpLabel\n"
8576                 "              OpStore %loc %accum\n"
8577                 "%ret        = OpLoad %v4f32 %result\n"
8578                 "              OpReturnValue %ret\n"
8579
8580                 "              OpFunctionEnd\n";
8581
8582         fragments2["pre_main"]  = typesAndConstants2;
8583         fragments2["testfun"]   = function2;
8584
8585         outputColors2[0]                        = RGBA(127, 229, 127, 255);
8586         outputColors2[1]                        = RGBA(127, 102, 0,   255);
8587         outputColors2[2]                        = RGBA(0,   229, 0,   255);
8588         outputColors2[3]                        = RGBA(0,   102, 127, 255);
8589
8590         createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8591
8592         const char      typesAndConstants3[]    =
8593                 "%true      = OpConstantTrue %bool\n"
8594                 "%false     = OpConstantFalse %bool\n"
8595                 "%c_f32_p2  = OpConstant %f32 0.2\n";
8596
8597         // Swap the second and the third element of the given parameter.
8598         const char      function3[]                             =
8599                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8600                 "%param     = OpFunctionParameter %v4f32\n"
8601                 "%entry     = OpLabel\n"
8602                 "%result    = OpVariable %fp_v4f32 Function\n"
8603                 "             OpStore %result %param\n"
8604                 "%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
8605                 "%a_init    = OpLoad %f32 %a_loc\n"
8606                 "%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
8607                 "%b_init    = OpLoad %f32 %b_loc\n"
8608                 "             OpBranch %phi\n"
8609
8610                 "%phi        = OpLabel\n"
8611                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8612                 "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
8613                 "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
8614                 "              OpLoopMerge %exit %phi None\n"
8615                 "              OpBranchConditional %still_loop %phi %exit\n"
8616
8617                 "%exit       = OpLabel\n"
8618                 "              OpStore %a_loc %a_next\n"
8619                 "              OpStore %b_loc %b_next\n"
8620                 "%ret        = OpLoad %v4f32 %result\n"
8621                 "              OpReturnValue %ret\n"
8622
8623                 "              OpFunctionEnd\n";
8624
8625         fragments3["pre_main"]  = typesAndConstants3;
8626         fragments3["testfun"]   = function3;
8627
8628         outputColors3[0]                        = RGBA(127, 127, 127, 255);
8629         outputColors3[1]                        = RGBA(127, 0,   0,   255);
8630         outputColors3[2]                        = RGBA(0,   0,   127, 255);
8631         outputColors3[3]                        = RGBA(0,   127, 0,   255);
8632
8633         createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8634
8635         const char      typesAndConstants4[]    =
8636                 "%f16        = OpTypeFloat 16\n"
8637                 "%v4f16      = OpTypeVector %f16 4\n"
8638                 "%fp_f16     = OpTypePointer Function %f16\n"
8639                 "%fp_v4f16   = OpTypePointer Function %v4f16\n"
8640                 "%true       = OpConstantTrue %bool\n"
8641                 "%false      = OpConstantFalse %bool\n"
8642                 "%c_f32_p2   = OpConstant %f32 0.2\n";
8643
8644         // Swap the second and the third element of the given parameter.
8645         const char      function4[]                             =
8646                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8647                 "%param      = OpFunctionParameter %v4f32\n"
8648                 "%entry      = OpLabel\n"
8649                 "%result     = OpVariable %fp_v4f16 Function\n"
8650                 "%param16    = OpFConvert %v4f16 %param\n"
8651                 "              OpStore %result %param16\n"
8652                 "%a_loc      = OpAccessChain %fp_f16 %result %c_i32_1\n"
8653                 "%a_init     = OpLoad %f16 %a_loc\n"
8654                 "%b_loc      = OpAccessChain %fp_f16 %result %c_i32_2\n"
8655                 "%b_init     = OpLoad %f16 %b_loc\n"
8656                 "              OpBranch %phi\n"
8657
8658                 "%phi        = OpLabel\n"
8659                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8660                 "%a_next     = OpPhi %f16  %a_init %entry %b_next %phi\n"
8661                 "%b_next     = OpPhi %f16  %b_init %entry %a_next %phi\n"
8662                 "              OpLoopMerge %exit %phi None\n"
8663                 "              OpBranchConditional %still_loop %phi %exit\n"
8664
8665                 "%exit       = OpLabel\n"
8666                 "              OpStore %a_loc %a_next\n"
8667                 "              OpStore %b_loc %b_next\n"
8668                 "%ret16      = OpLoad %v4f16 %result\n"
8669                 "%ret        = OpFConvert %v4f32 %ret16\n"
8670                 "              OpReturnValue %ret\n"
8671
8672                 "              OpFunctionEnd\n";
8673
8674         fragments4["pre_main"]          = typesAndConstants4;
8675         fragments4["testfun"]           = function4;
8676         fragments4["capability"]        = "OpCapability Float16\n";
8677
8678         extensions4.push_back("VK_KHR_shader_float16_int8");
8679
8680         vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8681
8682         outputColors4[0]                        = RGBA(127, 127, 127, 255);
8683         outputColors4[1]                        = RGBA(127, 0,   0,   255);
8684         outputColors4[2]                        = RGBA(0,   0,   127, 255);
8685         outputColors4[3]                        = RGBA(0,   127, 0,   255);
8686
8687         createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8688
8689         return group.release();
8690 }
8691
8692 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8693 {
8694         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8695         RGBA                                                    inputColors[4];
8696         RGBA                                                    outputColors[4];
8697
8698         // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8699         // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8700         // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8701         // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8702         const char                                              constantsAndTypes[]      =
8703                 "%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8704                 "%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8705                 "%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8706                 "%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8707                 "%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
8708
8709         const char                                              function[]       =
8710                 "%test_code      = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8711                 "%param          = OpFunctionParameter %v4f32\n"
8712                 "%label          = OpLabel\n"
8713                 "%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8714                 "%var2           = OpVariable %fp_f32 Function\n"
8715                 "%red            = OpCompositeExtract %f32 %param 0\n"
8716                 "%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8717                 "                  OpStore %var2 %plus_red\n"
8718                 "%val1           = OpLoad %f32 %var1\n"
8719                 "%val2           = OpLoad %f32 %var2\n"
8720                 "%mul            = OpFMul %f32 %val1 %val2\n"
8721                 "%add            = OpFAdd %f32 %mul %c_f32_n1\n"
8722                 "%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
8723                 "%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8724                 "%success        = OpLogicalOr %bool %is0 %isn1n24\n"
8725                 "%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
8726                 "%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8727                 "                  OpReturnValue %ret\n"
8728                 "                  OpFunctionEnd\n";
8729
8730         struct CaseNameDecoration
8731         {
8732                 string name;
8733                 string decoration;
8734         };
8735
8736
8737         CaseNameDecoration tests[] = {
8738                 {"multiplication",      "OpDecorate %mul NoContraction"},
8739                 {"addition",            "OpDecorate %add NoContraction"},
8740                 {"both",                        "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8741         };
8742
8743         getHalfColorsFullAlpha(inputColors);
8744
8745         for (deUint8 idx = 0; idx < 4; ++idx)
8746         {
8747                 inputColors[idx].setRed(0);
8748                 outputColors[idx] = RGBA(0, 0, 0, 255);
8749         }
8750
8751         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8752         {
8753                 map<string, string> fragments;
8754
8755                 fragments["decoration"] = tests[testNdx].decoration;
8756                 fragments["pre_main"] = constantsAndTypes;
8757                 fragments["testfun"] = function;
8758
8759                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8760         }
8761
8762         return group.release();
8763 }
8764
8765 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8766 {
8767         de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8768         RGBA                                                    colors[4];
8769
8770         const char                                              constantsAndTypes[]      =
8771                 "%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8772                 "%fp_a2f32          = OpTypePointer Function %a2f32\n"
8773                 "%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
8774                 "%fp_stype          = OpTypePointer Function %stype\n";
8775
8776         const char                                              function[]       =
8777                 "%test_code         = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8778                 "%param1            = OpFunctionParameter %v4f32\n"
8779                 "%lbl               = OpLabel\n"
8780                 "%v1                = OpVariable %fp_v4f32 Function\n"
8781                 "%v2                = OpVariable %fp_a2f32 Function\n"
8782                 "%v3                = OpVariable %fp_f32 Function\n"
8783                 "%v                 = OpVariable %fp_stype Function\n"
8784                 "%vv                = OpVariable %fp_stype Function\n"
8785                 "%vvv               = OpVariable %fp_f32 Function\n"
8786
8787                 "                     OpStore %v1 %c_v4f32_1_1_1_1\n"
8788                 "                     OpStore %v2 %c_a2f32_1\n"
8789                 "                     OpStore %v3 %c_f32_1\n"
8790
8791                 "%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8792                 "%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8793                 "%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
8794                 "%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
8795                 "%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
8796                 "%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
8797
8798                 "                    OpStore %p_v4f32 %v1_v ${access_type}\n"
8799                 "                    OpStore %p_a2f32 %v2_v ${access_type}\n"
8800                 "                    OpStore %p_f32 %v3_v ${access_type}\n"
8801
8802                 "                    OpCopyMemory %vv %v ${access_type}\n"
8803                 "                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
8804
8805                 "%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8806                 "%v_f32_2          = OpLoad %f32 %p_f32_2\n"
8807                 "%v_f32_3          = OpLoad %f32 %vvv\n"
8808
8809                 "%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8810                 "%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8811                 "                    OpReturnValue %ret2\n"
8812                 "                    OpFunctionEnd\n";
8813
8814         struct NameMemoryAccess
8815         {
8816                 string name;
8817                 string accessType;
8818         };
8819
8820
8821         NameMemoryAccess tests[] =
8822         {
8823                 { "none", "" },
8824                 { "volatile", "Volatile" },
8825                 { "aligned",  "Aligned 1" },
8826                 { "volatile_aligned",  "Volatile|Aligned 1" },
8827                 { "nontemporal_aligned",  "Nontemporal|Aligned 1" },
8828                 { "volatile_nontemporal",  "Volatile|Nontemporal" },
8829                 { "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
8830         };
8831
8832         getHalfColorsFullAlpha(colors);
8833
8834         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8835         {
8836                 map<string, string> fragments;
8837                 map<string, string> memoryAccess;
8838                 memoryAccess["access_type"] = tests[testNdx].accessType;
8839
8840                 fragments["pre_main"] = constantsAndTypes;
8841                 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8842                 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8843         }
8844         return memoryAccessTests.release();
8845 }
8846 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8847 {
8848         de::MovePtr<tcu::TestCaseGroup>         opUndefTests             (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8849         RGBA                                                            defaultColors[4];
8850         map<string, string>                                     fragments;
8851         getDefaultColors(defaultColors);
8852
8853         // First, simple cases that don't do anything with the OpUndef result.
8854         struct NameCodePair { string name, decl, type; };
8855         const NameCodePair tests[] =
8856         {
8857                 {"bool", "", "%bool"},
8858                 {"vec2uint32", "", "%v2u32"},
8859                 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8860                 {"sampler", "%type = OpTypeSampler", "%type"},
8861                 {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8862                 {"pointer", "", "%fp_i32"},
8863                 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8864                 {"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8865                 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8866         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8867         {
8868                 fragments["undef_type"] = tests[testNdx].type;
8869                 fragments["testfun"] = StringTemplate(
8870                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8871                         "%param1 = OpFunctionParameter %v4f32\n"
8872                         "%label_testfun = OpLabel\n"
8873                         "%undef = OpUndef ${undef_type}\n"
8874                         "OpReturnValue %param1\n"
8875                         "OpFunctionEnd\n").specialize(fragments);
8876                 fragments["pre_main"] = tests[testNdx].decl;
8877                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8878         }
8879         fragments.clear();
8880
8881         fragments["testfun"] =
8882                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8883                 "%param1 = OpFunctionParameter %v4f32\n"
8884                 "%label_testfun = OpLabel\n"
8885                 "%undef = OpUndef %f32\n"
8886                 "%zero = OpFMul %f32 %undef %c_f32_0\n"
8887                 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8888                 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8889                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8890                 "%b = OpFAdd %f32 %a %actually_zero\n"
8891                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8892                 "OpReturnValue %ret\n"
8893                 "OpFunctionEnd\n";
8894
8895         createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8896
8897         fragments["testfun"] =
8898                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8899                 "%param1 = OpFunctionParameter %v4f32\n"
8900                 "%label_testfun = OpLabel\n"
8901                 "%undef = OpUndef %i32\n"
8902                 "%zero = OpIMul %i32 %undef %c_i32_0\n"
8903                 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8904                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8905                 "OpReturnValue %ret\n"
8906                 "OpFunctionEnd\n";
8907
8908         createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8909
8910         fragments["testfun"] =
8911                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8912                 "%param1 = OpFunctionParameter %v4f32\n"
8913                 "%label_testfun = OpLabel\n"
8914                 "%undef = OpUndef %u32\n"
8915                 "%zero = OpIMul %u32 %undef %c_i32_0\n"
8916                 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8917                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8918                 "OpReturnValue %ret\n"
8919                 "OpFunctionEnd\n";
8920
8921         createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8922
8923         fragments["testfun"] =
8924                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8925                 "%param1 = OpFunctionParameter %v4f32\n"
8926                 "%label_testfun = OpLabel\n"
8927                 "%undef = OpUndef %v4f32\n"
8928                 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8929                 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8930                 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8931                 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8932                 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8933                 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8934                 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8935                 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8936                 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8937                 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8938                 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8939                 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8940                 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8941                 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8942                 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8943                 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8944                 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8945                 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8946                 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8947                 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8948                 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8949                 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8950                 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8951                 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8952                 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8953                 "OpReturnValue %ret\n"
8954                 "OpFunctionEnd\n";
8955
8956         createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8957
8958         fragments["pre_main"] =
8959                 "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8960         fragments["testfun"] =
8961                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8962                 "%param1 = OpFunctionParameter %v4f32\n"
8963                 "%label_testfun = OpLabel\n"
8964                 "%undef = OpUndef %m2x2f32\n"
8965                 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8966                 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8967                 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8968                 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8969                 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8970                 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8971                 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8972                 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8973                 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8974                 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8975                 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8976                 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8977                 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8978                 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8979                 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8980                 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8981                 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8982                 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8983                 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8984                 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8985                 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8986                 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8987                 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8988                 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8989                 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8990                 "OpReturnValue %ret\n"
8991                 "OpFunctionEnd\n";
8992
8993         createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8994
8995         return opUndefTests.release();
8996 }
8997
8998 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
8999 {
9000         const RGBA              inputColors[4]          =
9001         {
9002                 RGBA(0,         0,              0,              255),
9003                 RGBA(0,         0,              255,    255),
9004                 RGBA(0,         255,    0,              255),
9005                 RGBA(0,         255,    255,    255)
9006         };
9007
9008         const RGBA              expectedColors[4]       =
9009         {
9010                 RGBA(255,        0,              0,              255),
9011                 RGBA(255,        0,              0,              255),
9012                 RGBA(255,        0,              0,              255),
9013                 RGBA(255,        0,              0,              255)
9014         };
9015
9016         const struct SingleFP16Possibility
9017         {
9018                 const char* name;
9019                 const char* constant;  // Value to assign to %test_constant.
9020                 float           valueAsFloat;
9021                 const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9022         }                               tests[]                         =
9023         {
9024                 {
9025                         "negative",
9026                         "-0x1.3p1\n",
9027                         -constructNormalizedFloat(1, 0x300000),
9028                         "%cond = OpFOrdEqual %bool %c %test_constant\n"
9029                 }, // -19
9030                 {
9031                         "positive",
9032                         "0x1.0p7\n",
9033                         constructNormalizedFloat(7, 0x000000),
9034                         "%cond = OpFOrdEqual %bool %c %test_constant\n"
9035                 },  // +128
9036                 // SPIR-V requires that OpQuantizeToF16 flushes
9037                 // any numbers that would end up denormalized in F16 to zero.
9038                 {
9039                         "denorm",
9040                         "0x0.0006p-126\n",
9041                         std::ldexp(1.5f, -140),
9042                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9043                 },  // denorm
9044                 {
9045                         "negative_denorm",
9046                         "-0x0.0006p-126\n",
9047                         -std::ldexp(1.5f, -140),
9048                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9049                 }, // -denorm
9050                 {
9051                         "too_small",
9052                         "0x1.0p-16\n",
9053                         std::ldexp(1.0f, -16),
9054                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9055                 },     // too small positive
9056                 {
9057                         "negative_too_small",
9058                         "-0x1.0p-32\n",
9059                         -std::ldexp(1.0f, -32),
9060                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9061                 },      // too small negative
9062                 {
9063                         "negative_inf",
9064                         "-0x1.0p128\n",
9065                         -std::ldexp(1.0f, 128),
9066
9067                         "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9068                         "%inf = OpIsInf %bool %c\n"
9069                         "%cond = OpLogicalAnd %bool %gz %inf\n"
9070                 },     // -inf to -inf
9071                 {
9072                         "inf",
9073                         "0x1.0p128\n",
9074                         std::ldexp(1.0f, 128),
9075
9076                         "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9077                         "%inf = OpIsInf %bool %c\n"
9078                         "%cond = OpLogicalAnd %bool %gz %inf\n"
9079                 },     // +inf to +inf
9080                 {
9081                         "round_to_negative_inf",
9082                         "-0x1.0p32\n",
9083                         -std::ldexp(1.0f, 32),
9084
9085                         "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9086                         "%inf = OpIsInf %bool %c\n"
9087                         "%cond = OpLogicalAnd %bool %gz %inf\n"
9088                 },     // round to -inf
9089                 {
9090                         "round_to_inf",
9091                         "0x1.0p16\n",
9092                         std::ldexp(1.0f, 16),
9093
9094                         "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9095                         "%inf = OpIsInf %bool %c\n"
9096                         "%cond = OpLogicalAnd %bool %gz %inf\n"
9097                 },     // round to +inf
9098                 {
9099                         "nan",
9100                         "0x1.1p128\n",
9101                         std::numeric_limits<float>::quiet_NaN(),
9102
9103                         // Test for any NaN value, as NaNs are not preserved
9104                         "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9105                         "%cond = OpIsNan %bool %direct_quant\n"
9106                 }, // nan
9107                 {
9108                         "negative_nan",
9109                         "-0x1.0001p128\n",
9110                         std::numeric_limits<float>::quiet_NaN(),
9111
9112                         // Test for any NaN value, as NaNs are not preserved
9113                         "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9114                         "%cond = OpIsNan %bool %direct_quant\n"
9115                 } // -nan
9116         };
9117         const char*             constants                       =
9118                 "%test_constant = OpConstant %f32 ";  // The value will be test.constant.
9119
9120         StringTemplate  function                        (
9121                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9122                 "%param1        = OpFunctionParameter %v4f32\n"
9123                 "%label_testfun = OpLabel\n"
9124                 "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9125                 "%b             = OpFAdd %f32 %test_constant %a\n"
9126                 "%c             = OpQuantizeToF16 %f32 %b\n"
9127                 "${condition}\n"
9128                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9129                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9130                 "                 OpReturnValue %retval\n"
9131                 "OpFunctionEnd\n"
9132         );
9133
9134         const char*             specDecorations         = "OpDecorate %test_constant SpecId 0\n";
9135         const char*             specConstants           =
9136                         "%test_constant = OpSpecConstant %f32 0.\n"
9137                         "%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9138
9139         StringTemplate  specConstantFunction(
9140                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9141                 "%param1        = OpFunctionParameter %v4f32\n"
9142                 "%label_testfun = OpLabel\n"
9143                 "${condition}\n"
9144                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9145                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9146                 "                 OpReturnValue %retval\n"
9147                 "OpFunctionEnd\n"
9148         );
9149
9150         for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9151         {
9152                 map<string, string>                                                             codeSpecialization;
9153                 map<string, string>                                                             fragments;
9154                 codeSpecialization["condition"]                                 = tests[idx].condition;
9155                 fragments["testfun"]                                                    = function.specialize(codeSpecialization);
9156                 fragments["pre_main"]                                                   = string(constants) + tests[idx].constant + "\n";
9157                 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9158         }
9159
9160         for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9161         {
9162                 map<string, string>                                                             codeSpecialization;
9163                 map<string, string>                                                             fragments;
9164                 SpecConstants                                                                   passConstants;
9165
9166                 codeSpecialization["condition"]                                 = tests[idx].condition;
9167                 fragments["testfun"]                                                    = specConstantFunction.specialize(codeSpecialization);
9168                 fragments["decoration"]                                                 = specDecorations;
9169                 fragments["pre_main"]                                                   = specConstants;
9170
9171                 passConstants.append<float>(tests[idx].valueAsFloat);
9172
9173                 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9174         }
9175 }
9176
9177 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
9178 {
9179         RGBA inputColors[4] =  {
9180                 RGBA(0,         0,              0,              255),
9181                 RGBA(0,         0,              255,    255),
9182                 RGBA(0,         255,    0,              255),
9183                 RGBA(0,         255,    255,    255)
9184         };
9185
9186         RGBA expectedColors[4] =
9187         {
9188                 RGBA(255,        0,              0,              255),
9189                 RGBA(255,        0,              0,              255),
9190                 RGBA(255,        0,              0,              255),
9191                 RGBA(255,        0,              0,              255)
9192         };
9193
9194         struct DualFP16Possibility
9195         {
9196                 const char* name;
9197                 const char* input;
9198                 float           inputAsFloat;
9199                 const char* possibleOutput1;
9200                 const char* possibleOutput2;
9201         } tests[] = {
9202                 {
9203                         "positive_round_up_or_round_down",
9204                         "0x1.3003p8",
9205                         constructNormalizedFloat(8, 0x300300),
9206                         "0x1.304p8",
9207                         "0x1.3p8"
9208                 },
9209                 {
9210                         "negative_round_up_or_round_down",
9211                         "-0x1.6008p-7",
9212                         -constructNormalizedFloat(-7, 0x600800),
9213                         "-0x1.6p-7",
9214                         "-0x1.604p-7"
9215                 },
9216                 {
9217                         "carry_bit",
9218                         "0x1.01ep2",
9219                         constructNormalizedFloat(2, 0x01e000),
9220                         "0x1.01cp2",
9221                         "0x1.02p2"
9222                 },
9223                 {
9224                         "carry_to_exponent",
9225                         "0x1.ffep1",
9226                         constructNormalizedFloat(1, 0xffe000),
9227                         "0x1.ffcp1",
9228                         "0x1.0p2"
9229                 },
9230         };
9231         StringTemplate constants (
9232                 "%input_const = OpConstant %f32 ${input}\n"
9233                 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9234                 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9235                 );
9236
9237         StringTemplate specConstants (
9238                 "%input_const = OpSpecConstant %f32 0.\n"
9239                 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9240                 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9241         );
9242
9243         const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
9244
9245         const char* function  =
9246                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9247                 "%param1        = OpFunctionParameter %v4f32\n"
9248                 "%label_testfun = OpLabel\n"
9249                 "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9250                 // For the purposes of this test we assume that 0.f will always get
9251                 // faithfully passed through the pipeline stages.
9252                 "%b             = OpFAdd %f32 %input_const %a\n"
9253                 "%c             = OpQuantizeToF16 %f32 %b\n"
9254                 "%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
9255                 "%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
9256                 "%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
9257                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9258                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9259                 "                 OpReturnValue %retval\n"
9260                 "OpFunctionEnd\n";
9261
9262         for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9263                 map<string, string>                                                                     fragments;
9264                 map<string, string>                                                                     constantSpecialization;
9265
9266                 constantSpecialization["input"]                                         = tests[idx].input;
9267                 constantSpecialization["output1"]                                       = tests[idx].possibleOutput1;
9268                 constantSpecialization["output2"]                                       = tests[idx].possibleOutput2;
9269                 fragments["testfun"]                                                            = function;
9270                 fragments["pre_main"]                                                           = constants.specialize(constantSpecialization);
9271                 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9272         }
9273
9274         for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9275                 map<string, string>                                                                     fragments;
9276                 map<string, string>                                                                     constantSpecialization;
9277                 SpecConstants                                                                           passConstants;
9278
9279                 constantSpecialization["output1"]                                       = tests[idx].possibleOutput1;
9280                 constantSpecialization["output2"]                                       = tests[idx].possibleOutput2;
9281                 fragments["testfun"]                                                            = function;
9282                 fragments["decoration"]                                                         = specDecorations;
9283                 fragments["pre_main"]                                                           = specConstants.specialize(constantSpecialization);
9284
9285                 passConstants.append<float>(tests[idx].inputAsFloat);
9286
9287                 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9288         }
9289 }
9290
9291 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
9292 {
9293         de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
9294         createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9295         createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9296         return opQuantizeTests.release();
9297 }
9298
9299 struct ShaderPermutation
9300 {
9301         deUint8 vertexPermutation;
9302         deUint8 geometryPermutation;
9303         deUint8 tesscPermutation;
9304         deUint8 tessePermutation;
9305         deUint8 fragmentPermutation;
9306 };
9307
9308 ShaderPermutation getShaderPermutation(deUint8 inputValue)
9309 {
9310         ShaderPermutation       permutation =
9311         {
9312                 static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
9313                 static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
9314                 static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
9315                 static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
9316                 static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
9317         };
9318         return permutation;
9319 }
9320
9321 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
9322 {
9323         RGBA                                                            defaultColors[4];
9324         RGBA                                                            invertedColors[4];
9325         de::MovePtr<tcu::TestCaseGroup>         moduleTests                     (new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
9326
9327         getDefaultColors(defaultColors);
9328         getInvertedDefaultColors(invertedColors);
9329
9330         // Combined module tests
9331         {
9332                 // Shader stages: vertex and fragment
9333                 {
9334                         const ShaderElement combinedPipeline[]  =
9335                         {
9336                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9337                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9338                         };
9339
9340                         addFunctionCaseWithPrograms<InstanceContext>(
9341                                 moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
9342                                 createInstanceContext(combinedPipeline, map<string, string>()));
9343                 }
9344
9345                 // Shader stages: vertex, geometry and fragment
9346                 {
9347                         const ShaderElement combinedPipeline[]  =
9348                         {
9349                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9350                                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9351                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9352                         };
9353
9354                         addFunctionCaseWithPrograms<InstanceContext>(
9355                                 moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9356                                 createInstanceContext(combinedPipeline, map<string, string>()));
9357                 }
9358
9359                 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9360                 {
9361                         const ShaderElement combinedPipeline[]  =
9362                         {
9363                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9364                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9365                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9366                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9367                         };
9368
9369                         addFunctionCaseWithPrograms<InstanceContext>(
9370                                 moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
9371                                 createInstanceContext(combinedPipeline, map<string, string>()));
9372                 }
9373
9374                 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9375                 {
9376                         const ShaderElement combinedPipeline[]  =
9377                         {
9378                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9379                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9380                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9381                                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9382                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9383                         };
9384
9385                         addFunctionCaseWithPrograms<InstanceContext>(
9386                                 moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9387                                 createInstanceContext(combinedPipeline, map<string, string>()));
9388                 }
9389         }
9390
9391         const char* numbers[] =
9392         {
9393                 "1", "2"
9394         };
9395
9396         for (deInt8 idx = 0; idx < 32; ++idx)
9397         {
9398                 ShaderPermutation                       permutation             = getShaderPermutation(idx);
9399                 string                                          name                    = string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
9400                 const ShaderElement                     pipeline[]              =
9401                 {
9402                         ShaderElement("vert",   string("vert") +        numbers[permutation.vertexPermutation],         VK_SHADER_STAGE_VERTEX_BIT),
9403                         ShaderElement("geom",   string("geom") +        numbers[permutation.geometryPermutation],       VK_SHADER_STAGE_GEOMETRY_BIT),
9404                         ShaderElement("tessc",  string("tessc") +       numbers[permutation.tesscPermutation],          VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9405                         ShaderElement("tesse",  string("tesse") +       numbers[permutation.tessePermutation],          VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9406                         ShaderElement("frag",   string("frag") +        numbers[permutation.fragmentPermutation],       VK_SHADER_STAGE_FRAGMENT_BIT)
9407                 };
9408
9409                 // If there are an even number of swaps, then it should be no-op.
9410                 // If there are an odd number, the color should be flipped.
9411                 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
9412                 {
9413                         addFunctionCaseWithPrograms<InstanceContext>(
9414                                         moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9415                                         createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9416                 }
9417                 else
9418                 {
9419                         addFunctionCaseWithPrograms<InstanceContext>(
9420                                         moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9421                                         createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9422                 }
9423         }
9424         return moduleTests.release();
9425 }
9426
9427 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
9428 {
9429         switch (task)
9430         {
9431                 case SHADER_TASK_NONE:                  return "";
9432                 case SHADER_TASK_NORMAL:                return prefix + "_normal";
9433                 case SHADER_TASK_UNUSED_VAR:    return prefix + "_unused_var";
9434                 case SHADER_TASK_UNUSED_FUNC:   return prefix + "_unused_func";
9435                 default:                                                DE_ASSERT(DE_FALSE);
9436         }
9437         // unreachable
9438         return "";
9439 }
9440
9441 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9442 {
9443         switch (index)
9444         {
9445         case SHADER_TASK_INDEX_VERTEX:                  return "vertex";
9446         case SHADER_TASK_INDEX_GEOMETRY:                return "geom";
9447         case SHADER_TASK_INDEX_TESS_CONTROL:    return "tessc";
9448         case SHADER_TASK_INDEX_TESS_EVAL:               return "tesse";
9449         case SHADER_TASK_INDEX_FRAGMENT:                return "frag";
9450         default:                                                                DE_ASSERT(DE_FALSE);
9451         }
9452         // unreachable
9453         return "";
9454 }
9455
9456 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9457 {
9458         std::string testName = location.toString();
9459
9460         for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9461         {
9462                 if (shaderTasks[i] != SHADER_TASK_NONE)
9463                 {
9464                         testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9465                 }
9466         }
9467
9468         return testName;
9469 }
9470
9471 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9472 {
9473         de::MovePtr<tcu::TestCaseGroup>         moduleTests                             (new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9474
9475         ShaderTaskArray                                         shaderCombinations[]    =
9476         {
9477                 // Vertex                                       Geometry                                        Tess. Control                           Tess. Evaluation                        Fragment
9478                 { SHADER_TASK_UNUSED_VAR,       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9479                 { SHADER_TASK_UNUSED_FUNC,      SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9480                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_VAR  },
9481                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_FUNC },
9482                 { SHADER_TASK_NORMAL,           SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9483                 { SHADER_TASK_NORMAL,           SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9484                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NORMAL,                     SHADER_TASK_NORMAL      },
9485                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NORMAL,                     SHADER_TASK_NORMAL      },
9486                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NORMAL,                     SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NORMAL      },
9487                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NORMAL,                     SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NORMAL      }
9488         };
9489
9490         const VariableLocation                          testLocations[] =
9491         {
9492                 // Set          Binding
9493                 { 0,            5                       },
9494                 { 5,            5                       },
9495         };
9496
9497         for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9498         {
9499                 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9500                 {
9501                         const ShaderTaskArray&  shaderTasks             = shaderCombinations[combNdx];
9502                         const VariableLocation& location                = testLocations[locationNdx];
9503                         std::string                             testName                = getUnusedVarTestName(shaderTasks, location);
9504
9505                         addFunctionCaseWithPrograms<UnusedVariableContext>(
9506                                 moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9507                                 createUnusedVariableContext(shaderTasks, location));
9508                 }
9509         }
9510
9511         return moduleTests.release();
9512 }
9513
9514 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9515 {
9516         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9517         RGBA defaultColors[4];
9518         getDefaultColors(defaultColors);
9519         map<string, string> fragments;
9520         fragments["pre_main"] =
9521                 "%c_f32_5 = OpConstant %f32 5.\n";
9522
9523         // A loop with a single block. The Continue Target is the loop block
9524         // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9525         // -- the "continue construct" forms the entire loop.
9526         fragments["testfun"] =
9527                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9528                 "%param1 = OpFunctionParameter %v4f32\n"
9529
9530                 "%entry = OpLabel\n"
9531                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9532                 "OpBranch %loop\n"
9533
9534                 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9535                 "%loop = OpLabel\n"
9536                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9537                 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9538                 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9539                 "%val = OpFAdd %f32 %val1 %delta\n"
9540                 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9541                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9542                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9543                 "OpLoopMerge %exit %loop None\n"
9544                 "OpBranchConditional %again %loop %exit\n"
9545
9546                 "%exit = OpLabel\n"
9547                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9548                 "OpReturnValue %result\n"
9549
9550                 "OpFunctionEnd\n";
9551
9552         createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9553
9554         // Body comprised of multiple basic blocks.
9555         const StringTemplate multiBlock(
9556                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9557                 "%param1 = OpFunctionParameter %v4f32\n"
9558
9559                 "%entry = OpLabel\n"
9560                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9561                 "OpBranch %loop\n"
9562
9563                 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9564                 "%loop = OpLabel\n"
9565                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9566                 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9567                 "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9568                 // There are several possibilities for the Continue Target below.  Each
9569                 // will be specialized into a separate test case.
9570                 "OpLoopMerge %exit ${continue_target} None\n"
9571                 "OpBranch %if\n"
9572
9573                 "%if = OpLabel\n"
9574                 ";delta_next = (delta > 0) ? -1 : 1;\n"
9575                 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9576                 "OpSelectionMerge %gather DontFlatten\n"
9577                 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9578
9579                 "%odd = OpLabel\n"
9580                 "OpBranch %gather\n"
9581
9582                 "%even = OpLabel\n"
9583                 "OpBranch %gather\n"
9584
9585                 "%gather = OpLabel\n"
9586                 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9587                 "%val = OpFAdd %f32 %val1 %delta\n"
9588                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9589                 "OpBranch %cont\n"
9590
9591                 "%cont = OpLabel\n"
9592                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9593                 "OpBranchConditional %again %loop %exit\n"
9594
9595                 "%exit = OpLabel\n"
9596                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9597                 "OpReturnValue %result\n"
9598
9599                 "OpFunctionEnd\n");
9600
9601         map<string, string> continue_target;
9602
9603         // The Continue Target is the loop block itself.
9604         continue_target["continue_target"] = "%if";
9605         fragments["testfun"] = multiBlock.specialize(continue_target);
9606         createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9607
9608         // The Continue Target is at the end of the loop.
9609         continue_target["continue_target"] = "%cont";
9610         fragments["testfun"] = multiBlock.specialize(continue_target);
9611         createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9612
9613         // A loop with continue statement.
9614         fragments["testfun"] =
9615                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9616                 "%param1 = OpFunctionParameter %v4f32\n"
9617
9618                 "%entry = OpLabel\n"
9619                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9620                 "OpBranch %loop\n"
9621
9622                 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9623                 "%loop = OpLabel\n"
9624                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9625                 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9626                 "OpLoopMerge %exit %continue None\n"
9627                 "OpBranch %if\n"
9628
9629                 "%if = OpLabel\n"
9630                 ";skip if %count==2\n"
9631                 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9632                 "OpBranchConditional %eq2 %continue %body\n"
9633
9634                 "%body = OpLabel\n"
9635                 "%fcount = OpConvertSToF %f32 %count\n"
9636                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9637                 "OpBranch %continue\n"
9638
9639                 "%continue = OpLabel\n"
9640                 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9641                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9642                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9643                 "OpBranchConditional %again %loop %exit\n"
9644
9645                 "%exit = OpLabel\n"
9646                 "%same = OpFSub %f32 %val %c_f32_8\n"
9647                 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9648                 "OpReturnValue %result\n"
9649                 "OpFunctionEnd\n";
9650         createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9651
9652         // A loop with break.
9653         fragments["testfun"] =
9654                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9655                 "%param1 = OpFunctionParameter %v4f32\n"
9656
9657                 "%entry = OpLabel\n"
9658                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9659                 "%dot = OpDot %f32 %param1 %param1\n"
9660                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9661                 "%zero = OpConvertFToU %u32 %div\n"
9662                 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9663                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9664                 "OpBranch %loop\n"
9665
9666                 ";adds 4 and 3 to %val0 (exits early)\n"
9667                 "%loop = OpLabel\n"
9668                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9669                 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9670                 "OpLoopMerge %exit %continue None\n"
9671                 "OpBranch %if\n"
9672
9673                 "%if = OpLabel\n"
9674                 ";end loop if %count==%two\n"
9675                 "%above2 = OpSGreaterThan %bool %count %two\n"
9676                 "OpBranchConditional %above2 %body %exit\n"
9677
9678                 "%body = OpLabel\n"
9679                 "%fcount = OpConvertSToF %f32 %count\n"
9680                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9681                 "OpBranch %continue\n"
9682
9683                 "%continue = OpLabel\n"
9684                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9685                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9686                 "OpBranchConditional %again %loop %exit\n"
9687
9688                 "%exit = OpLabel\n"
9689                 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9690                 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9691                 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9692                 "OpReturnValue %result\n"
9693                 "OpFunctionEnd\n";
9694         createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9695
9696         // A loop with return.
9697         fragments["testfun"] =
9698                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9699                 "%param1 = OpFunctionParameter %v4f32\n"
9700
9701                 "%entry = OpLabel\n"
9702                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9703                 "%dot = OpDot %f32 %param1 %param1\n"
9704                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9705                 "%zero = OpConvertFToU %u32 %div\n"
9706                 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9707                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9708                 "OpBranch %loop\n"
9709
9710                 ";returns early without modifying %param1\n"
9711                 "%loop = OpLabel\n"
9712                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9713                 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9714                 "OpLoopMerge %exit %continue None\n"
9715                 "OpBranch %if\n"
9716
9717                 "%if = OpLabel\n"
9718                 ";return if %count==%two\n"
9719                 "%above2 = OpSGreaterThan %bool %count %two\n"
9720                 "OpSelectionMerge %body DontFlatten\n"
9721                 "OpBranchConditional %above2 %body %early_exit\n"
9722
9723                 "%early_exit = OpLabel\n"
9724                 "OpReturnValue %param1\n"
9725
9726                 "%body = OpLabel\n"
9727                 "%fcount = OpConvertSToF %f32 %count\n"
9728                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9729                 "OpBranch %continue\n"
9730
9731                 "%continue = OpLabel\n"
9732                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9733                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9734                 "OpBranchConditional %again %loop %exit\n"
9735
9736                 "%exit = OpLabel\n"
9737                 ";should never get here, so return an incorrect result\n"
9738                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9739                 "OpReturnValue %result\n"
9740                 "OpFunctionEnd\n";
9741         createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9742
9743         // Continue inside a switch block to break to enclosing loop's merge block.
9744         // Matches roughly the following GLSL code:
9745         // for (; keep_going; keep_going = false)
9746         // {
9747         //     switch (int(param1.x))
9748         //     {
9749         //         case 0: continue;
9750         //         case 1: continue;
9751         //         default: continue;
9752         //     }
9753         //     dead code: modify return value to invalid result.
9754         // }
9755         fragments["pre_main"] =
9756                 "%fp_bool = OpTypePointer Function %bool\n"
9757                 "%true = OpConstantTrue %bool\n"
9758                 "%false = OpConstantFalse %bool\n";
9759
9760         fragments["testfun"] =
9761                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9762                 "%param1 = OpFunctionParameter %v4f32\n"
9763
9764                 "%entry = OpLabel\n"
9765                 "%keep_going = OpVariable %fp_bool Function\n"
9766                 "%val_ptr = OpVariable %fp_f32 Function\n"
9767                 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
9768                 "OpStore %keep_going %true\n"
9769                 "OpBranch %forloop_begin\n"
9770
9771                 "%forloop_begin = OpLabel\n"
9772                 "OpLoopMerge %forloop_merge %forloop_continue None\n"
9773                 "OpBranch %forloop\n"
9774
9775                 "%forloop = OpLabel\n"
9776                 "%for_condition = OpLoad %bool %keep_going\n"
9777                 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9778
9779                 "%forloop_body = OpLabel\n"
9780                 "OpStore %val_ptr %param1_x\n"
9781                 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9782
9783                 "OpSelectionMerge %switch_merge None\n"
9784                 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9785                 "%case_0 = OpLabel\n"
9786                 "OpBranch %forloop_continue\n"
9787                 "%case_1 = OpLabel\n"
9788                 "OpBranch %forloop_continue\n"
9789                 "%default = OpLabel\n"
9790                 "OpBranch %forloop_continue\n"
9791                 "%switch_merge = OpLabel\n"
9792                 ";should never get here, so change the return value to invalid result\n"
9793                 "OpStore %val_ptr %c_f32_1\n"
9794                 "OpBranch %forloop_continue\n"
9795
9796                 "%forloop_continue = OpLabel\n"
9797                 "OpStore %keep_going %false\n"
9798                 "OpBranch %forloop_begin\n"
9799                 "%forloop_merge = OpLabel\n"
9800
9801                 "%val = OpLoad %f32 %val_ptr\n"
9802                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9803                 "OpReturnValue %result\n"
9804                 "OpFunctionEnd\n";
9805         createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9806
9807         return testGroup.release();
9808 }
9809
9810 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
9811 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9812 {
9813         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9814         map<string, string> fragments;
9815
9816         // A barrier inside a function body.
9817         fragments["pre_main"] =
9818                 "%Workgroup = OpConstant %i32 2\n"
9819                 "%Invocation = OpConstant %i32 4\n"
9820                 "%MemorySemanticsNone = OpConstant %i32 0\n";
9821         fragments["testfun"] =
9822                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9823                 "%param1 = OpFunctionParameter %v4f32\n"
9824                 "%label_testfun = OpLabel\n"
9825                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9826                 "OpReturnValue %param1\n"
9827                 "OpFunctionEnd\n";
9828         addTessCtrlTest(testGroup.get(), "in_function", fragments);
9829
9830         // Common setup code for the following tests.
9831         fragments["pre_main"] =
9832                 "%Workgroup = OpConstant %i32 2\n"
9833                 "%Invocation = OpConstant %i32 4\n"
9834                 "%MemorySemanticsNone = OpConstant %i32 0\n"
9835                 "%c_f32_5 = OpConstant %f32 5.\n";
9836         const string setupPercentZero =  // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9837                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9838                 "%param1 = OpFunctionParameter %v4f32\n"
9839                 "%entry = OpLabel\n"
9840                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9841                 "%dot = OpDot %f32 %param1 %param1\n"
9842                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9843                 "%zero = OpConvertFToU %u32 %div\n";
9844
9845         // Barriers inside OpSwitch branches.
9846         fragments["testfun"] =
9847                 setupPercentZero +
9848                 "OpSelectionMerge %switch_exit None\n"
9849                 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9850
9851                 "%case1 = OpLabel\n"
9852                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9853                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9854                 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9855                 "OpBranch %switch_exit\n"
9856
9857                 "%switch_default = OpLabel\n"
9858                 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9859                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9860                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9861                 "OpBranch %switch_exit\n"
9862
9863                 "%case0 = OpLabel\n"
9864                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9865                 "OpBranch %switch_exit\n"
9866
9867                 "%switch_exit = OpLabel\n"
9868                 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9869                 "OpReturnValue %ret\n"
9870                 "OpFunctionEnd\n";
9871         addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9872
9873         // Barriers inside if-then-else.
9874         fragments["testfun"] =
9875                 setupPercentZero +
9876                 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9877                 "OpSelectionMerge %exit DontFlatten\n"
9878                 "OpBranchConditional %eq0 %then %else\n"
9879
9880                 "%else = OpLabel\n"
9881                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9882                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9883                 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9884                 "OpBranch %exit\n"
9885
9886                 "%then = OpLabel\n"
9887                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9888                 "OpBranch %exit\n"
9889                 "%exit = OpLabel\n"
9890                 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9891                 "OpReturnValue %ret\n"
9892                 "OpFunctionEnd\n";
9893         addTessCtrlTest(testGroup.get(), "in_if", fragments);
9894
9895         // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9896         // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9897         fragments["testfun"] =
9898                 setupPercentZero +
9899                 "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9900                 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9901                 "OpSelectionMerge %exit DontFlatten\n"
9902                 "OpBranchConditional %thread0 %then %else\n"
9903
9904                 "%else = OpLabel\n"
9905                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9906                 "OpBranch %exit\n"
9907
9908                 "%then = OpLabel\n"
9909                 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9910                 "OpBranch %exit\n"
9911
9912                 "%exit = OpLabel\n"
9913                 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
9914                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9915                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9916                 "OpReturnValue %ret\n"
9917                 "OpFunctionEnd\n";
9918         addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9919
9920         // A barrier inside a loop.
9921         fragments["pre_main"] =
9922                 "%Workgroup = OpConstant %i32 2\n"
9923                 "%Invocation = OpConstant %i32 4\n"
9924                 "%MemorySemanticsNone = OpConstant %i32 0\n"
9925                 "%c_f32_10 = OpConstant %f32 10.\n";
9926         fragments["testfun"] =
9927                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9928                 "%param1 = OpFunctionParameter %v4f32\n"
9929                 "%entry = OpLabel\n"
9930                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9931                 "OpBranch %loop\n"
9932
9933                 ";adds 4, 3, 2, and 1 to %val0\n"
9934                 "%loop = OpLabel\n"
9935                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9936                 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9937                 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9938                 "%fcount = OpConvertSToF %f32 %count\n"
9939                 "%val = OpFAdd %f32 %val1 %fcount\n"
9940                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9941                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9942                 "OpLoopMerge %exit %loop None\n"
9943                 "OpBranchConditional %again %loop %exit\n"
9944
9945                 "%exit = OpLabel\n"
9946                 "%same = OpFSub %f32 %val %c_f32_10\n"
9947                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9948                 "OpReturnValue %ret\n"
9949                 "OpFunctionEnd\n";
9950         addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9951
9952         return testGroup.release();
9953 }
9954
9955 // Test for the OpFRem instruction.
9956 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9957 {
9958         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9959         map<string, string>                                     fragments;
9960         RGBA                                                            inputColors[4];
9961         RGBA                                                            outputColors[4];
9962
9963         fragments["pre_main"]                            =
9964                 "%c_f32_3 = OpConstant %f32 3.0\n"
9965                 "%c_f32_n3 = OpConstant %f32 -3.0\n"
9966                 "%c_f32_4 = OpConstant %f32 4.0\n"
9967                 "%c_f32_p75 = OpConstant %f32 0.75\n"
9968                 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9969                 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9970                 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9971
9972         // The test does the following.
9973         // vec4 result = (param1 * 8.0) - 4.0;
9974         // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9975         fragments["testfun"]                             =
9976                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9977                 "%param1 = OpFunctionParameter %v4f32\n"
9978                 "%label_testfun = OpLabel\n"
9979                 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9980                 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9981                 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9982                 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9983                 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9984                 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9985                 "OpReturnValue %xy_0_1\n"
9986                 "OpFunctionEnd\n";
9987
9988
9989         inputColors[0]          = RGBA(16,      16,             0, 255);
9990         inputColors[1]          = RGBA(232, 232,        0, 255);
9991         inputColors[2]          = RGBA(232, 16,         0, 255);
9992         inputColors[3]          = RGBA(16,      232,    0, 255);
9993
9994         outputColors[0]         = RGBA(64,      64,             0, 255);
9995         outputColors[1]         = RGBA(255, 255,        0, 255);
9996         outputColors[2]         = RGBA(255, 64,         0, 255);
9997         outputColors[3]         = RGBA(64,      255,    0, 255);
9998
9999         createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
10000         return testGroup.release();
10001 }
10002
10003 // Test for the OpSRem instruction.
10004 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10005 {
10006         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
10007         map<string, string>                                     fragments;
10008
10009         fragments["pre_main"]                            =
10010                 "%c_f32_255 = OpConstant %f32 255.0\n"
10011                 "%c_i32_128 = OpConstant %i32 128\n"
10012                 "%c_i32_255 = OpConstant %i32 255\n"
10013                 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10014                 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10015                 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10016
10017         // The test does the following.
10018         // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10019         // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10020         // return float(result + 128) / 255.0;
10021         fragments["testfun"]                             =
10022                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10023                 "%param1 = OpFunctionParameter %v4f32\n"
10024                 "%label_testfun = OpLabel\n"
10025                 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10026                 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10027                 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10028                 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10029                 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10030                 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10031                 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10032                 "%x_out = OpSRem %i32 %x_in %y_in\n"
10033                 "%y_out = OpSRem %i32 %y_in %z_in\n"
10034                 "%z_out = OpSRem %i32 %z_in %x_in\n"
10035                 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10036                 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10037                 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10038                 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10039                 "OpReturnValue %float_out\n"
10040                 "OpFunctionEnd\n";
10041
10042         const struct CaseParams
10043         {
10044                 const char*             name;
10045                 const char*             failMessageTemplate;    // customized status message
10046                 qpTestResult    failResult;                             // override status on failure
10047                 int                             operands[4][3];                 // four (x, y, z) vectors of operands
10048                 int                             results[4][3];                  // four (x, y, z) vectors of results
10049         } cases[] =
10050         {
10051                 {
10052                         "positive",
10053                         "${reason}",
10054                         QP_TEST_RESULT_FAIL,
10055                         { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                 // operands
10056                         { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                 // results
10057                 },
10058                 {
10059                         "all",
10060                         "Inconsistent results, but within specification: ${reason}",
10061                         negFailResult,                                                                                                                  // negative operands, not required by the spec
10062                         { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },    // operands
10063                         { { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },    // results
10064                 },
10065         };
10066         // If either operand is negative the result is undefined. Some implementations may still return correct values.
10067
10068         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10069         {
10070                 const CaseParams&       params                  = cases[caseNdx];
10071                 RGBA                            inputColors[4];
10072                 RGBA                            outputColors[4];
10073
10074                 for (int i = 0; i < 4; ++i)
10075                 {
10076                         inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10077                         outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10078                 }
10079
10080                 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10081         }
10082
10083         return testGroup.release();
10084 }
10085
10086 // Test for the OpSMod instruction.
10087 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10088 {
10089         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
10090         map<string, string>                                     fragments;
10091
10092         fragments["pre_main"]                            =
10093                 "%c_f32_255 = OpConstant %f32 255.0\n"
10094                 "%c_i32_128 = OpConstant %i32 128\n"
10095                 "%c_i32_255 = OpConstant %i32 255\n"
10096                 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10097                 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10098                 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10099
10100         // The test does the following.
10101         // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10102         // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10103         // return float(result + 128) / 255.0;
10104         fragments["testfun"]                             =
10105                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10106                 "%param1 = OpFunctionParameter %v4f32\n"
10107                 "%label_testfun = OpLabel\n"
10108                 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10109                 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10110                 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10111                 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10112                 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10113                 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10114                 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10115                 "%x_out = OpSMod %i32 %x_in %y_in\n"
10116                 "%y_out = OpSMod %i32 %y_in %z_in\n"
10117                 "%z_out = OpSMod %i32 %z_in %x_in\n"
10118                 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10119                 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10120                 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10121                 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10122                 "OpReturnValue %float_out\n"
10123                 "OpFunctionEnd\n";
10124
10125         const struct CaseParams
10126         {
10127                 const char*             name;
10128                 const char*             failMessageTemplate;    // customized status message
10129                 qpTestResult    failResult;                             // override status on failure
10130                 int                             operands[4][3];                 // four (x, y, z) vectors of operands
10131                 int                             results[4][3];                  // four (x, y, z) vectors of results
10132         } cases[] =
10133         {
10134                 {
10135                         "positive",
10136                         "${reason}",
10137                         QP_TEST_RESULT_FAIL,
10138                         { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                         // operands
10139                         { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                         // results
10140                 },
10141                 {
10142                         "all",
10143                         "Inconsistent results, but within specification: ${reason}",
10144                         negFailResult,                                                                                                                          // negative operands, not required by the spec
10145                         { { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },        // operands
10146                         { { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },        // results
10147                 },
10148         };
10149         // If either operand is negative the result is undefined. Some implementations may still return correct values.
10150
10151         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10152         {
10153                 const CaseParams&       params                  = cases[caseNdx];
10154                 RGBA                            inputColors[4];
10155                 RGBA                            outputColors[4];
10156
10157                 for (int i = 0; i < 4; ++i)
10158                 {
10159                         inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10160                         outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10161                 }
10162
10163                 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10164         }
10165         return testGroup.release();
10166 }
10167
10168 enum ConversionDataType
10169 {
10170         DATA_TYPE_SIGNED_8,
10171         DATA_TYPE_SIGNED_16,
10172         DATA_TYPE_SIGNED_32,
10173         DATA_TYPE_SIGNED_64,
10174         DATA_TYPE_UNSIGNED_8,
10175         DATA_TYPE_UNSIGNED_16,
10176         DATA_TYPE_UNSIGNED_32,
10177         DATA_TYPE_UNSIGNED_64,
10178         DATA_TYPE_FLOAT_16,
10179         DATA_TYPE_FLOAT_32,
10180         DATA_TYPE_FLOAT_64,
10181         DATA_TYPE_VEC2_SIGNED_16,
10182         DATA_TYPE_VEC2_SIGNED_32
10183 };
10184
10185 const string getBitWidthStr (ConversionDataType type)
10186 {
10187         switch (type)
10188         {
10189                 case DATA_TYPE_SIGNED_8:
10190                 case DATA_TYPE_UNSIGNED_8:
10191                         return "8";
10192
10193                 case DATA_TYPE_SIGNED_16:
10194                 case DATA_TYPE_UNSIGNED_16:
10195                 case DATA_TYPE_FLOAT_16:
10196                         return "16";
10197
10198                 case DATA_TYPE_SIGNED_32:
10199                 case DATA_TYPE_UNSIGNED_32:
10200                 case DATA_TYPE_FLOAT_32:
10201                 case DATA_TYPE_VEC2_SIGNED_16:
10202                         return "32";
10203
10204                 case DATA_TYPE_SIGNED_64:
10205                 case DATA_TYPE_UNSIGNED_64:
10206                 case DATA_TYPE_FLOAT_64:
10207                 case DATA_TYPE_VEC2_SIGNED_32:
10208                         return "64";
10209
10210                 default:
10211                         DE_ASSERT(false);
10212         }
10213         return "";
10214 }
10215
10216 const string getByteWidthStr (ConversionDataType type)
10217 {
10218         switch (type)
10219         {
10220                 case DATA_TYPE_SIGNED_8:
10221                 case DATA_TYPE_UNSIGNED_8:
10222                         return "1";
10223
10224                 case DATA_TYPE_SIGNED_16:
10225                 case DATA_TYPE_UNSIGNED_16:
10226                 case DATA_TYPE_FLOAT_16:
10227                         return "2";
10228
10229                 case DATA_TYPE_SIGNED_32:
10230                 case DATA_TYPE_UNSIGNED_32:
10231                 case DATA_TYPE_FLOAT_32:
10232                 case DATA_TYPE_VEC2_SIGNED_16:
10233                         return "4";
10234
10235                 case DATA_TYPE_SIGNED_64:
10236                 case DATA_TYPE_UNSIGNED_64:
10237                 case DATA_TYPE_FLOAT_64:
10238                 case DATA_TYPE_VEC2_SIGNED_32:
10239                         return "8";
10240
10241                 default:
10242                         DE_ASSERT(false);
10243         }
10244         return "";
10245 }
10246
10247 bool isSigned (ConversionDataType type)
10248 {
10249         switch (type)
10250         {
10251                 case DATA_TYPE_SIGNED_8:
10252                 case DATA_TYPE_SIGNED_16:
10253                 case DATA_TYPE_SIGNED_32:
10254                 case DATA_TYPE_SIGNED_64:
10255                 case DATA_TYPE_FLOAT_16:
10256                 case DATA_TYPE_FLOAT_32:
10257                 case DATA_TYPE_FLOAT_64:
10258                 case DATA_TYPE_VEC2_SIGNED_16:
10259                 case DATA_TYPE_VEC2_SIGNED_32:
10260                         return true;
10261
10262                 case DATA_TYPE_UNSIGNED_8:
10263                 case DATA_TYPE_UNSIGNED_16:
10264                 case DATA_TYPE_UNSIGNED_32:
10265                 case DATA_TYPE_UNSIGNED_64:
10266                         return false;
10267
10268                 default:
10269                         DE_ASSERT(false);
10270         }
10271         return false;
10272 }
10273
10274 bool isInt (ConversionDataType type)
10275 {
10276         switch (type)
10277         {
10278                 case DATA_TYPE_SIGNED_8:
10279                 case DATA_TYPE_SIGNED_16:
10280                 case DATA_TYPE_SIGNED_32:
10281                 case DATA_TYPE_SIGNED_64:
10282                 case DATA_TYPE_UNSIGNED_8:
10283                 case DATA_TYPE_UNSIGNED_16:
10284                 case DATA_TYPE_UNSIGNED_32:
10285                 case DATA_TYPE_UNSIGNED_64:
10286                         return true;
10287
10288                 case DATA_TYPE_FLOAT_16:
10289                 case DATA_TYPE_FLOAT_32:
10290                 case DATA_TYPE_FLOAT_64:
10291                 case DATA_TYPE_VEC2_SIGNED_16:
10292                 case DATA_TYPE_VEC2_SIGNED_32:
10293                         return false;
10294
10295                 default:
10296                         DE_ASSERT(false);
10297         }
10298         return false;
10299 }
10300
10301 bool isFloat (ConversionDataType type)
10302 {
10303         switch (type)
10304         {
10305                 case DATA_TYPE_SIGNED_8:
10306                 case DATA_TYPE_SIGNED_16:
10307                 case DATA_TYPE_SIGNED_32:
10308                 case DATA_TYPE_SIGNED_64:
10309                 case DATA_TYPE_UNSIGNED_8:
10310                 case DATA_TYPE_UNSIGNED_16:
10311                 case DATA_TYPE_UNSIGNED_32:
10312                 case DATA_TYPE_UNSIGNED_64:
10313                 case DATA_TYPE_VEC2_SIGNED_16:
10314                 case DATA_TYPE_VEC2_SIGNED_32:
10315                         return false;
10316
10317                 case DATA_TYPE_FLOAT_16:
10318                 case DATA_TYPE_FLOAT_32:
10319                 case DATA_TYPE_FLOAT_64:
10320                         return true;
10321
10322                 default:
10323                         DE_ASSERT(false);
10324         }
10325         return false;
10326 }
10327
10328 const string getTypeName (ConversionDataType type)
10329 {
10330         string prefix = isSigned(type) ? "" : "u";
10331
10332         if              (isInt(type))                                           return prefix + "int"   + getBitWidthStr(type);
10333         else if (isFloat(type))                                         return prefix + "float" + getBitWidthStr(type);
10334         else if (type == DATA_TYPE_VEC2_SIGNED_16)      return "i16vec2";
10335         else if (type == DATA_TYPE_VEC2_SIGNED_32)      return "i32vec2";
10336         else                                                                            DE_ASSERT(false);
10337
10338         return "";
10339 }
10340
10341 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
10342 {
10343         const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10344
10345         return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10346 }
10347
10348 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
10349 {
10350         string prefix;
10351
10352         if              (isInt(type))                                           prefix = isSigned(type) ? "i" : "u";
10353         else if (isFloat(type))                                         prefix = "f";
10354         else if (type == DATA_TYPE_VEC2_SIGNED_16)      return "i16vec2";
10355         else if (type == DATA_TYPE_VEC2_SIGNED_32)      return "v2i32";
10356         else                                                                            DE_ASSERT(false);
10357         if ((isInt(type) || isFloat(type)) && elements == 2)
10358         {
10359                 prefix = "v2" + prefix;
10360         }
10361
10362         return prefix + getBitWidthStr(type);
10363 }
10364
10365 template<typename T>
10366 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
10367 {
10368         return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10369 }
10370
10371 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
10372 {
10373         switch (type)
10374         {
10375                 case DATA_TYPE_SIGNED_8:                return getSpecializedBuffer<deInt8>(number, elements);
10376                 case DATA_TYPE_SIGNED_16:               return getSpecializedBuffer<deInt16>(number, elements);
10377                 case DATA_TYPE_SIGNED_32:               return getSpecializedBuffer<deInt32>(number, elements);
10378                 case DATA_TYPE_SIGNED_64:               return getSpecializedBuffer<deInt64>(number, elements);
10379                 case DATA_TYPE_UNSIGNED_8:              return getSpecializedBuffer<deUint8>(number, elements);
10380                 case DATA_TYPE_UNSIGNED_16:             return getSpecializedBuffer<deUint16>(number, elements);
10381                 case DATA_TYPE_UNSIGNED_32:             return getSpecializedBuffer<deUint32>(number, elements);
10382                 case DATA_TYPE_UNSIGNED_64:             return getSpecializedBuffer<deUint64>(number, elements);
10383                 case DATA_TYPE_FLOAT_16:                return getSpecializedBuffer<deUint16>(number, elements);
10384                 case DATA_TYPE_FLOAT_32:                return getSpecializedBuffer<deUint32>(number, elements);
10385                 case DATA_TYPE_FLOAT_64:                return getSpecializedBuffer<deUint64>(number, elements);
10386                 case DATA_TYPE_VEC2_SIGNED_16:  return getSpecializedBuffer<deUint32>(number, elements);
10387                 case DATA_TYPE_VEC2_SIGNED_32:  return getSpecializedBuffer<deUint64>(number, elements);
10388
10389                 default:                                                TCU_THROW(InternalError, "Unimplemented type passed");
10390         }
10391 }
10392
10393 bool usesInt8 (ConversionDataType from, ConversionDataType to)
10394 {
10395         return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
10396                         from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
10397 }
10398
10399 bool usesInt16 (ConversionDataType from, ConversionDataType to)
10400 {
10401         return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
10402                         from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
10403                         from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10404 }
10405
10406 bool usesInt32 (ConversionDataType from, ConversionDataType to)
10407 {
10408         return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
10409                         from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
10410                         from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
10411 }
10412
10413 bool usesInt64 (ConversionDataType from, ConversionDataType to)
10414 {
10415         return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
10416                         from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
10417 }
10418
10419 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
10420 {
10421         return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10422 }
10423
10424 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
10425 {
10426         return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10427 }
10428
10429 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
10430 {
10431         return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10432 }
10433
10434 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
10435 {
10436         if (usesInt16(from, to) && !usesInt32(from, to))
10437                 vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
10438
10439         if (usesInt64(from, to))
10440                 vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
10441
10442         if (usesFloat64(from, to))
10443                 vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
10444
10445         if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10446         {
10447                 extensions.push_back("VK_KHR_16bit_storage");
10448                 vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10449         }
10450
10451         if (usesFloat16(from, to) || usesInt8(from, to))
10452         {
10453                 extensions.push_back("VK_KHR_shader_float16_int8");
10454
10455                 if (usesFloat16(from, to))
10456                 {
10457                         vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10458                 }
10459
10460                 if (usesInt8(from, to))
10461                 {
10462                         vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10463
10464                         extensions.push_back("VK_KHR_8bit_storage");
10465                         vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10466                 }
10467         }
10468 }
10469
10470 struct ConvertCase
10471 {
10472         ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10473         : m_fromType            (from)
10474         , m_toType                      (to)
10475         , m_elements            (1)
10476         , m_useStorageExt       (useStorageExt)
10477         , m_name                        (getTestName(from, to, suffix))
10478         {
10479                 string caps;
10480                 string decl;
10481                 string exts;
10482
10483                 m_asmTypes["inStorageType"]     = getAsmTypeName(from);
10484                 m_asmTypes["outStorageType"] = getAsmTypeName(to);
10485                 m_asmTypes["inCast"] = "OpCopyObject";
10486                 m_asmTypes["outCast"] = "OpCopyObject";
10487                 // If the storage extensions are being avoided, tests instead uses
10488                 // vectors so that they are easily convertible to 32-bit integers.
10489                 // |m_elements| indicates the size of the vector. It modifies how many
10490                 // items added to the buffers and converted in the tests.
10491                 //
10492                 // Currently only supports 1 (default) or 2 elements.
10493                 if (!m_useStorageExt)
10494                 {
10495                         bool in_change = false;
10496                         bool out_change = false;
10497                         if (usesFloat16(from, from) || usesInt16(from, from))
10498                         {
10499                                 m_asmTypes["inStorageType"] = "u32";
10500                                 m_asmTypes["inCast"] = "OpBitcast";
10501                                 m_elements = 2;
10502                                 in_change = true;
10503                         }
10504                         if (usesFloat16(to, to) || usesInt16(to, to))
10505                         {
10506                                 m_asmTypes["outStorageType"] = "u32";
10507                                 m_asmTypes["outCast"] = "OpBitcast";
10508                                 m_elements = 2;
10509                                 out_change = true;
10510                         }
10511                         if (in_change && !out_change)
10512                         {
10513                                 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10514                         }
10515                         if (!in_change && out_change)
10516                         {
10517                                 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10518                         }
10519                 }
10520
10521                 // Safety check for implementation.
10522                 if (m_elements < 1 || m_elements > 2)
10523                         TCU_THROW(InternalError, "Unsupported number of elements");
10524
10525                 m_asmTypes["inputType"]         = getAsmTypeName(from, m_elements);
10526                 m_asmTypes["outputType"]        = getAsmTypeName(to, m_elements);
10527
10528                 m_inputBuffer = getBuffer(from, number, m_elements);
10529                 if (separateOutput)
10530                         m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10531                 else
10532                         m_outputBuffer = getBuffer(to, number, m_elements);
10533
10534                 if (usesInt8(from, to))
10535                 {
10536                         bool requiresInt8Capability = true;
10537                         if (instruction == "OpUConvert" || instruction == "OpSConvert")
10538                         {
10539                                 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10540                                 if (usesInt32(from, to))
10541                                         requiresInt8Capability = false;
10542                         }
10543
10544                         caps += "OpCapability StorageBuffer8BitAccess\n";
10545                         if (requiresInt8Capability)
10546                                 caps += "OpCapability Int8\n";
10547
10548                         decl += "%i8         = OpTypeInt 8 1\n"
10549                                         "%u8         = OpTypeInt 8 0\n";
10550
10551                         if (m_elements == 2)
10552                         {
10553                                 decl += "%v2i8       = OpTypeVector %i8 2\n"
10554                                                 "%v2u8       = OpTypeVector %u8 2\n";
10555                         }
10556                         exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10557                 }
10558
10559                 if (usesInt16(from, to))
10560                 {
10561                         bool requiresInt16Capability = true;
10562
10563                         if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10564                         {
10565                                 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10566                                 if (usesInt32(from, to) || usesFloat32(from, to))
10567                                         requiresInt16Capability = false;
10568                         }
10569
10570                         decl += "%i16        = OpTypeInt 16 1\n"
10571                                         "%u16        = OpTypeInt 16 0\n";
10572                         if (m_elements == 2)
10573                         {
10574                                 decl += "%v2i16      = OpTypeVector %i16 2\n"
10575                                                 "%v2u16      = OpTypeVector %u16 2\n";
10576                         }
10577                         else
10578                         {
10579                                 decl += "%i16vec2    = OpTypeVector %i16 2\n";
10580                         }
10581
10582                         // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10583                         if (requiresInt16Capability || !m_useStorageExt)
10584                                 caps += "OpCapability Int16\n";
10585                 }
10586
10587                 if (usesFloat16(from, to))
10588                 {
10589                         decl += "%f16        = OpTypeFloat 16\n";
10590                         if (m_elements == 2)
10591                         {
10592                                 decl += "%v2f16      = OpTypeVector %f16 2\n";
10593                         }
10594
10595                         // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10596                         if (!usesFloat32(from, to) || !m_useStorageExt)
10597                                 caps += "OpCapability Float16\n";
10598                 }
10599
10600                 if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10601                 {
10602                         caps += "OpCapability StorageUniformBufferBlock16\n";
10603                         exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10604                 }
10605
10606                 if (usesInt64(from, to))
10607                 {
10608                         caps += "OpCapability Int64\n";
10609                         decl += "%i64        = OpTypeInt 64 1\n"
10610                                         "%u64        = OpTypeInt 64 0\n";
10611                         if (m_elements == 2)
10612                         {
10613                                 decl += "%v2i64      = OpTypeVector %i64 2\n"
10614                                                 "%v2u64      = OpTypeVector %u64 2\n";
10615                         }
10616                 }
10617
10618                 if (usesFloat64(from, to))
10619                 {
10620                         caps += "OpCapability Float64\n";
10621                         decl += "%f64        = OpTypeFloat 64\n";
10622                         if (m_elements == 2)
10623                         {
10624                                 decl += "%v2f64        = OpTypeVector %f64 2\n";
10625                         }
10626                 }
10627
10628                 m_asmTypes["datatype_capabilities"]             = caps;
10629                 m_asmTypes["datatype_additional_decl"]  = decl;
10630                 m_asmTypes["datatype_extensions"]               = exts;
10631         }
10632
10633         ConversionDataType              m_fromType;
10634         ConversionDataType              m_toType;
10635         deUint32                                m_elements;
10636         bool                                    m_useStorageExt;
10637         string                                  m_name;
10638         map<string, string>             m_asmTypes;
10639         BufferSp                                m_inputBuffer;
10640         BufferSp                                m_outputBuffer;
10641 };
10642
10643 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10644 {
10645         map<string, string> params = convertCase.m_asmTypes;
10646
10647         params["instruction"]   = instruction;
10648         params["inDecorator"]   = getByteWidthStr(convertCase.m_fromType);
10649         params["outDecorator"]  = getByteWidthStr(convertCase.m_toType);
10650
10651         std::string shader (
10652                 "OpCapability Shader\n"
10653                 "${datatype_capabilities}"
10654                 "${datatype_extensions:opt}"
10655                 "OpMemoryModel Logical GLSL450\n"
10656                 "OpEntryPoint GLCompute %main \"main\"\n"
10657                 "OpExecutionMode %main LocalSize 1 1 1\n"
10658                 "OpSource GLSL 430\n"
10659                 "OpName %main           \"main\"\n"
10660                 // Decorators
10661                 "OpDecorate %indata DescriptorSet 0\n"
10662                 "OpDecorate %indata Binding 0\n"
10663                 "OpDecorate %outdata DescriptorSet 0\n"
10664                 "OpDecorate %outdata Binding 1\n"
10665                 "OpDecorate %in_buf BufferBlock\n"
10666                 "OpDecorate %out_buf BufferBlock\n"
10667                 "OpMemberDecorate %in_buf 0 Offset 0\n"
10668                 "OpMemberDecorate %out_buf 0 Offset 0\n"
10669                 // Base types
10670                 "%void       = OpTypeVoid\n"
10671                 "%voidf      = OpTypeFunction %void\n"
10672                 "%u32        = OpTypeInt 32 0\n"
10673                 "%i32        = OpTypeInt 32 1\n"
10674                 "%f32        = OpTypeFloat 32\n"
10675                 "%v2i32      = OpTypeVector %i32 2\n"
10676                 "${datatype_additional_decl}"
10677         );
10678         if (addVectors)
10679         {
10680                 shader += "%v2u32 = OpTypeVector %u32 2\n"
10681                                         "%v2f32 = OpTypeVector %f32 2\n";
10682         }
10683         shader +=
10684                 "%uvec3      = OpTypeVector %u32 3\n"
10685                 // Derived types
10686                 "%in_ptr     = OpTypePointer Uniform %${inStorageType}\n"
10687                 "%out_ptr    = OpTypePointer Uniform %${outStorageType}\n"
10688                 "%in_buf     = OpTypeStruct %${inStorageType}\n"
10689                 "%out_buf    = OpTypeStruct %${outStorageType}\n"
10690                 "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
10691                 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10692                 "%indata     = OpVariable %in_bufptr Uniform\n"
10693                 "%outdata    = OpVariable %out_bufptr Uniform\n"
10694                 // Constants
10695                 "%zero       = OpConstant %i32 0\n"
10696                 // Main function
10697                 "%main       = OpFunction %void None %voidf\n"
10698                 "%label      = OpLabel\n"
10699                 "%inloc      = OpAccessChain %in_ptr %indata %zero\n"
10700                 "%outloc     = OpAccessChain %out_ptr %outdata %zero\n"
10701                 "%inval      = OpLoad %${inStorageType} %inloc\n"
10702                 "%in_cast    = ${inCast} %${inputType} %inval\n"
10703                 "%conv       = ${instruction} %${outputType} %in_cast\n"
10704                 "%out_cast   = ${outCast} %${outStorageType} %conv\n"
10705                 "              OpStore %outloc %out_cast\n"
10706                 "              OpReturn\n"
10707                 "              OpFunctionEnd\n"
10708         ;
10709
10710         return StringTemplate(shader).specialize(params);
10711 }
10712
10713 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10714 {
10715         if (instruction == "OpUConvert")
10716         {
10717                 // Convert unsigned int to unsigned int
10718                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_16,          42));
10719                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_32,          73));
10720                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_64,          121));
10721
10722                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_8,           33));
10723                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_32,          60653));
10724                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_64,          17991));
10725
10726                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_64,          904256275));
10727                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_16,          6275));
10728                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_8,           17));
10729
10730                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_32,          701256243));
10731                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_16,          4741));
10732                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_8,           65));
10733
10734                 // Zero extension for int->uint
10735                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
10736                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   209));
10737                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   251));
10738                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
10739                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   62195));
10740                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
10741
10742                 // Truncate for int->uint
10743                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
10744                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
10745                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
10746                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
10747                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
10748                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
10749         }
10750         else if (instruction == "OpSConvert")
10751         {
10752                 // Sign extension int->int
10753                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_16,            -30));
10754                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_32,            55));
10755                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_64,            -3));
10756                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_32,            14669));
10757                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_64,            -3341));
10758                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_64,            973610259));
10759
10760                 // Truncate for int->int
10761                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_8,                     81));
10762                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_8,                     -93));
10763                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_8,                     3182748172687672ll,                                     true,   56));
10764                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_16,            12382));
10765                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_32,            -972812359));
10766                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_16,            -1067742499291926803ll,                         true,   -4371));
10767
10768                 // Sign extension for int->uint
10769                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
10770                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   4294967249u));
10771                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   18446744073709551611ull));
10772                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
10773                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   18446744073709548275ull));
10774                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
10775
10776                 // Truncate for int->uint
10777                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
10778                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
10779                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
10780                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
10781                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
10782                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
10783
10784                 // Sign extension for uint->int
10785                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_16,            71));
10786                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_32,            201,                                                            true,   -55));
10787                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_64,            188,                                                            true,   -68));
10788                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_32,            14669));
10789                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_64,            62195,                                                          true,   -3341));
10790                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_64,            973610259));
10791
10792                 // Truncate for uint->int
10793                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_8,                     67));
10794                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_8,                     133,                                                            true,   -123));
10795                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_8,                     836927654193256494ull,                          true,   46));
10796                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_16,            12382));
10797                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_32,            18446744072736739257ull,                        true,   -972812359));
10798                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_16,            17379001574417624813ull,                        true,   -4371));
10799
10800                 // Convert i16vec2 to i32vec2 and vice versa
10801                 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10802                 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10803                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_VEC2_SIGNED_16,       DATA_TYPE_VEC2_SIGNED_32,       (33413u << 16)                  | 27593,        true,   (4294935173ull << 32)   | 27593));
10804                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_VEC2_SIGNED_32,       DATA_TYPE_VEC2_SIGNED_16,       (4294935173ull << 32)   | 27593,        true,   (33413u << 16)                  | 27593));
10805         }
10806         else if (instruction == "OpFConvert")
10807         {
10808                 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10809                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_64,                     0x449a4000,                                                     true,   0x4093480000000000));
10810                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_32,                     0x4093480000000000,                                     true,   0x449a4000));
10811
10812                 // Conversion to/from 32-bit floats are supported by both 16-bit
10813                 // storage and Float16. The tests are duplicated to exercise both
10814                 // cases.
10815                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_16,                     0x449a4000,                                                     true,   0x64D2));
10816                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_32,                     0x64D2,                                                         true,   0x449a4000));
10817                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_16,                     0x449a4000,                                                     true,   0x64D2,                                 "no_storage",   false));
10818                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_32,                     0x64D2,                                                         true,   0x449a4000,                             "no_storage",   false));
10819
10820                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_64,                     0x64D2,                                                         true,   0x4093480000000000));
10821                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_16,                     0x4093480000000000,                                     true,   0x64D2));
10822                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_64,                     0x64D2,                                                         true,   0x4093480000000000,             "no_storage",   false));
10823             testCases.push_back(ConvertCase(instruction,        DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_16,                     0x4093480000000000,                                     true,   0x64D2,                                 "no_storage",   false));
10824
10825         }
10826         else if (instruction == "OpConvertFToU")
10827         {
10828                 // Normal numbers from uint8 range
10829                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x5020,                                                         true,   33,                                                                     "33",   false));
10830                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x42280000,                                                     true,   42,                                                                     "42"));
10831                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x4067800000000000ull,                          true,   188,                                                            "188"));
10832
10833                 // Maximum uint8 value
10834                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x5BF8,                                                         true,   255,                                                            "max",  false));
10835                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x437F0000,                                                     true,   255,                                                            "max"));
10836                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x406FE00000000000ull,                          true,   255,                                                            "max"));
10837
10838                 // +0
10839                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x0000,                                                         true,   0,                                                                      "p0",   false));
10840                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x00000000,                                                     true,   0,                                                                      "p0"));
10841                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x0000000000000000ull,                          true,   0,                                                                      "p0"));
10842
10843                 // -0
10844                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x8000,                                                         true,   0,                                                                      "m0",   false));
10845                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x80000000,                                                     true,   0,                                                                      "m0"));
10846                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x8000000000000000ull,                          true,   0,                                                                      "m0"));
10847
10848                 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10849                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x64D2,                                                         true,   1234,                                                           "1234", false));
10850                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x64D2,                                                         true,   1234,                                                           "1234", false));
10851                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x64D2,                                                         true,   1234,                                                           "1234", false));
10852
10853                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10854                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x7BFF,                                                         true,   65504,                                                          "max",  false));
10855                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x7BFF,                                                         true,   65504,                                                          "max",  false));
10856                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x7BFF,                                                         true,   65504,                                                          "max",  false));
10857
10858                 // +0
10859                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x0000,                                                         true,   0,                                                                      "p0",   false));
10860                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x0000,                                                         true,   0,                                                                      "p0",   false));
10861                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x0000,                                                         true,   0,                                                                      "p0",   false));
10862
10863                 // -0
10864                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x8000,                                                         true,   0,                                                                      "m0",   false));
10865                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x8000,                                                         true,   0,                                                                      "m0",   false));
10866                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x8000,                                                         true,   0,                                                                      "m0",   false));
10867
10868                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_16,          0x449a4000,                                                     true,   1234));
10869                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_32,          0x449a4000,                                                     true,   1234));
10870                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_64,          0x449a4000,                                                     true,   1234));
10871                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_16,          0x4093480000000000,                                     true,   1234));
10872                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_32,          0x4093480000000000,                                     true,   1234));
10873                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_64,          0x4093480000000000,                                     true,   1234));
10874         }
10875         else if (instruction == "OpConvertUToF")
10876         {
10877                 // Normal numbers from uint8 range
10878                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_16,                     116,                                                            true,   0x5740,                                                         "116",  false));
10879                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_32,                     232,                                                            true,   0x43680000,                                                     "232"));
10880                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_64,                     164,                                                            true,   0x4064800000000000ull,                          "164"));
10881
10882                 // Maximum uint8 value
10883                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_16,                     255,                                                            true,   0x5BF8,                                                         "max",  false));
10884                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_32,                     255,                                                            true,   0x437F0000,                                                     "max"));
10885                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_64,                     255,                                                            true,   0x406FE00000000000ull,                          "max"));
10886
10887                 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10888                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234", false));
10889                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234", false));
10890                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234", false));
10891
10892                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10893                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
10894                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
10895                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
10896
10897                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_32,                     4294967296ll,                                           true,   0x4f800000,                                                     "4294967296",   false));
10898                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_64,                     4294967296ll,                                           true,   0x41f0000000000000,                                     "4294967296",   false));
10899
10900                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_32,                     0xffffff0000000000,                                     true,   0x5f7fffff,                                                     "max",  false));
10901
10902                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10903                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10904                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10905                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10906                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10907                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10908         }
10909         else if (instruction == "OpConvertFToS")
10910         {
10911                 // Normal numbers from int8 range
10912                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0xC980,                                                         true,   -11,                                                            "m11",  false));
10913                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0xC2140000,                                                     true,   -37,                                                            "m37"));
10914                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0xC050800000000000ull,                          true,   -66,                                                            "m66"));
10915
10916                 // Minimum int8 value
10917                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0xD800,                                                         true,   -128,                                                           "min",  false));
10918                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0xC3000000,                                                     true,   -128,                                                           "min"));
10919                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0xC060000000000000ull,                          true,   -128,                                                           "min"));
10920
10921                 // Maximum int8 value
10922                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x57F0,                                                         true,   127,                                                            "max",  false));
10923                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x42FE0000,                                                     true,   127,                                                            "max"));
10924                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x405FC00000000000ull,                          true,   127,                                                            "max"));
10925
10926                 // +0
10927                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x0000,                                                         true,   0,                                                                      "p0",   false));
10928                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x00000000,                                                     true,   0,                                                                      "p0"));
10929                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x0000000000000000ull,                          true,   0,                                                                      "p0"));
10930
10931                 // -0
10932                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x8000,                                                         true,   0,                                                                      "m0",   false));
10933                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x80000000,                                                     true,   0,                                                                      "m0"));
10934                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x8000000000000000ull,                          true,   0,                                                                      "m0"));
10935
10936                 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10937                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0xE4D2,                                                         true,   -1234,                                                          "m1234",        false));
10938                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0xE4D2,                                                         true,   -1234,                                                          "m1234",        false));
10939                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0xE4D2,                                                         true,   -1234,                                                          "m1234",        false));
10940
10941                 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10942                 // 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
10943                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0xF800,                                                         true,   -32768,                                                         "min",  false));
10944                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0xFBFF,                                                         true,   -65504,                                                         "min",  false));
10945                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0xFBFF,                                                         true,   -65504,                                                         "min",  false));
10946
10947                 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10948                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10949                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x77FF,                                                         true,   32752,                                                          "max",  false));
10950                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x7BFF,                                                         true,   65504,                                                          "max",  false));
10951                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x7BFF,                                                         true,   65504,                                                          "max",  false));
10952
10953                 // +0
10954                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x0000,                                                         true,   0,                                                                      "p0",   false));
10955                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x0000,                                                         true,   0,                                                                      "p0",   false));
10956                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x0000,                                                         true,   0,                                                                      "p0",   false));
10957
10958                 // -0
10959                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x8000,                                                         true,   0,                                                                      "m0",   false));
10960                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x8000,                                                         true,   0,                                                                      "m0",   false));
10961                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x8000,                                                         true,   0,                                                                      "m0",   false));
10962
10963                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0xc49a4000,                                                     true,   -1234));
10964                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_32,            0xc49a4000,                                                     true,   -1234));
10965                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_64,            0xc49a4000,                                                     true,   -1234));
10966                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_16,            0xc093480000000000,                                     true,   -1234));
10967                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_32,            0xc093480000000000,                                     true,   -1234));
10968                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_64,            0xc093480000000000,                                     true,   -1234));
10969                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0x453b9000,                                                     true,    3001,                                                          "p3001"));
10970                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0xc53b9000,                                                     true,   -3001,                                                          "m3001"));
10971         }
10972         else if (instruction == "OpConvertSToF")
10973         {
10974                 // Normal numbers from int8 range
10975                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     -12,                                                            true,   0xCA00,                                                         "m21",  false));
10976                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     -21,                                                            true,   0xC1A80000,                                                     "m21"));
10977                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     -99,                                                            true,   0xC058C00000000000ull,                          "m99"));
10978
10979                 // Minimum int8 value
10980                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     -128,                                                           true,   0xD800,                                                         "min",  false));
10981                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     -128,                                                           true,   0xC3000000,                                                     "min"));
10982                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     -128,                                                           true,   0xC060000000000000ull,                          "min"));
10983
10984                 // Maximum int8 value
10985                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     127,                                                            true,   0x57F0,                                                         "max",  false));
10986                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     127,                                                            true,   0x42FE0000,                                                     "max"));
10987                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     127,                                                            true,   0x405FC00000000000ull,                          "max"));
10988
10989                 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10990                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234",        false));
10991                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234",        false));
10992                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234",        false));
10993
10994                 // 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
10995                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     32768,                                                          true,   0x7800,                                                         "p32768",       false));
10996                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     32768,                                                          true,   0x7800,                                                         "p32768",       false));
10997
10998                 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10999                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "m32768",       false));
11000                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "m32768",       false));
11001
11002                 // 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
11003                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min",  false));
11004                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -65504,                                                         true,   0xFBFF,                                                         "min",  false));
11005                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -65504,                                                         true,   0xFBFF,                                                         "min",  false));
11006
11007                 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11008                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11009                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max",  false));
11010                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
11011                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
11012
11013                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     4294967296ll,                                           true,   0x4f800000,                                                     "p4294967296",  false));
11014                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_64,                     4294967296ll,                                           true,   0x41f0000000000000,                                     "p4294967296",  false));
11015                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     -4294967296ll,                                          true,   0xcf800000,                                                     "m4294967296",  false));
11016                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_64,                     -4294967296ll,                                          true,   0xc1f0000000000000,                                     "m4294967296",  false));
11017
11018                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     0x7fffff8000000000,                                     true,   0x5effffff,                                                     "max",  false));
11019                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     -0x7fffff8000000000,                            true,   0xdeffffff,                                                     "min",  false));
11020
11021                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
11022                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
11023                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
11024                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
11025                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
11026                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
11027         }
11028         else
11029                 DE_FATAL("Unknown instruction");
11030 }
11031
11032 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
11033 {
11034         map<string, string> params = convertCase.m_asmTypes;
11035         map<string, string> fragments;
11036
11037         params["instruction"] = instruction;
11038         params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11039
11040         const StringTemplate decoration (
11041                 "      OpDecorate %SSBOi DescriptorSet 0\n"
11042                 "      OpDecorate %SSBOo DescriptorSet 0\n"
11043                 "      OpDecorate %SSBOi Binding 0\n"
11044                 "      OpDecorate %SSBOo Binding 1\n"
11045                 "      OpDecorate %s_SSBOi Block\n"
11046                 "      OpDecorate %s_SSBOo Block\n"
11047                 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11048                 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11049
11050         const StringTemplate pre_main (
11051                 "${datatype_additional_decl:opt}"
11052                 "    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11053                 "   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11054                 "   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11055                 "   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11056                 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11057                 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11058                 "     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11059                 "     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11060
11061         const StringTemplate testfun (
11062                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11063                 "%param      = OpFunctionParameter %v4f32\n"
11064                 "%label      = OpLabel\n"
11065                 "%iLoc       = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11066                 "%oLoc       = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11067                 "%valIn      = OpLoad %${inStorageType} %iLoc\n"
11068                 "%valInCast  = ${inCast} %${inputType} %valIn\n"
11069                 "%conv       = ${instruction} %${outputType} %valInCast\n"
11070                 "%valOutCast = ${outCast} %${outStorageType} %conv\n"
11071                 "              OpStore %oLoc %valOutCast\n"
11072                 "              OpReturnValue %param\n"
11073                 "              OpFunctionEnd\n");
11074
11075         params["datatype_extensions"] =
11076                 params["datatype_extensions"] +
11077                 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11078
11079         fragments["capability"] = params["datatype_capabilities"];
11080         fragments["extension"]  = params["datatype_extensions"];
11081         fragments["decoration"] = decoration.specialize(params);
11082         fragments["pre_main"]   = pre_main.specialize(params);
11083         fragments["testfun"]    = testfun.specialize(params);
11084
11085         return fragments;
11086 }
11087
11088 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase& convertCase)
11089 {
11090         map<string, string> params = convertCase.m_asmTypes;
11091         map<string, string> fragments;
11092
11093         params["instruction"] = instruction;
11094         params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11095
11096         const StringTemplate decoration(
11097                 "      OpDecorate %SSBOi DescriptorSet 0\n"
11098                 "      OpDecorate %SSBOo DescriptorSet 0\n"
11099                 "      OpDecorate %SSBOi Binding 0\n"
11100                 "      OpDecorate %SSBOo Binding 1\n"
11101                 "      OpDecorate %s_SSBOi Block\n"
11102                 "      OpDecorate %s_SSBOo Block\n"
11103                 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11104                 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11105
11106         const StringTemplate pre_main(
11107                 "${datatype_additional_decl:opt}"
11108                 "    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11109                 "   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11110                 "   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11111                 "   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11112                 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11113                 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11114                 "     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11115                 "     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11116
11117         const StringTemplate testfun(
11118                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11119                 "%param     = OpFunctionParameter %v4f32\n"
11120                 "%label     = OpLabel\n"
11121                 "%iLoc      = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11122                 "%oLoc      = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11123                 "%inval      = OpLoad %${inStorageType} %iLoc\n"
11124                 "%in_cast    = ${inCast} %${inputType} %inval\n"
11125                 "%conv       = ${instruction} %${outputType} %in_cast\n"
11126                 "%out_cast   = ${outCast} %${outStorageType} %conv\n"
11127                 "              OpStore %oLoc %out_cast\n"
11128                 "              OpReturnValue %param\n"
11129                 "              OpFunctionEnd\n");
11130
11131         params["datatype_extensions"] =
11132                 params["datatype_extensions"] +
11133                 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11134
11135         fragments["capability"] = params["datatype_capabilities"];
11136         fragments["extension"] = params["datatype_extensions"];
11137         fragments["decoration"] = decoration.specialize(params);
11138         fragments["pre_main"] = pre_main.specialize(params);
11139         fragments["testfun"] = testfun.specialize(params);
11140         return fragments;
11141 }
11142
11143 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
11144 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11145 {
11146         de::MovePtr<tcu::TestCaseGroup>         group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11147         vector<ConvertCase>                                     testCases;
11148         createConvertCases(testCases, instruction);
11149
11150         for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11151         {
11152                 ComputeShaderSpec spec;
11153                 spec.assembly                   = getConvertCaseShaderStr(instruction, *test, true);
11154                 spec.numWorkGroups              = IVec3(1, 1, 1);
11155                 spec.inputs.push_back   (test->m_inputBuffer);
11156                 spec.outputs.push_back  (test->m_outputBuffer);
11157
11158                 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
11159
11160                 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
11161         }
11162         return group.release();
11163 }
11164
11165 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
11166 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11167 {
11168         de::MovePtr<tcu::TestCaseGroup>         group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11169         vector<ConvertCase>                                     testCases;
11170         createConvertCases(testCases, instruction);
11171
11172         for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11173         {
11174                 map<string, string>     fragments               = (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) : getConvertCaseFragmentsNoStorage(instruction,*test);
11175                 VulkanFeatures          vulkanFeatures;
11176                 GraphicsResources       resources;
11177                 vector<string>          extensions;
11178                 SpecConstants           noSpecConstants;
11179                 PushConstants           noPushConstants;
11180                 GraphicsInterfaces      noInterfaces;
11181                 tcu::RGBA                       defaultColors[4];
11182
11183                 getDefaultColors                        (defaultColors);
11184                 resources.inputs.push_back      (Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11185                 resources.outputs.push_back     (Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11186                 extensions.push_back            ("VK_KHR_storage_buffer_storage_class");
11187
11188                 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
11189
11190                 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics      = true;
11191                 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics            = true;
11192
11193                 createTestsForAllStages(
11194                         test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
11195                         noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
11196         }
11197         return group.release();
11198 }
11199
11200 // Constant-Creation Instructions: OpConstant, OpConstantComposite
11201 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
11202 {
11203         de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests                (new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
11204         RGBA                                                    inputColors[4];
11205         RGBA                                                    outputColors[4];
11206         vector<string>                                  extensions;
11207         GraphicsResources                               resources;
11208         VulkanFeatures                                  features;
11209
11210         const char                                              functionStart[]  =
11211                 "%test_code             = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11212                 "%param1                = OpFunctionParameter %v4f32\n"
11213                 "%lbl                   = OpLabel\n";
11214
11215         const char                                              functionEnd[]           =
11216                 "%transformed_param_32  = OpFConvert %v4f32 %transformed_param\n"
11217                 "                         OpReturnValue %transformed_param_32\n"
11218                 "                         OpFunctionEnd\n";
11219
11220         struct NameConstantsCode
11221         {
11222                 string name;
11223                 string constants;
11224                 string code;
11225         };
11226
11227 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11228                         "%f16                  = OpTypeFloat 16\n"                                                 \
11229                         "%c_f16_0              = OpConstant %f16 0.0\n"                                            \
11230                         "%c_f16_0_5            = OpConstant %f16 0.5\n"                                            \
11231                         "%c_f16_1              = OpConstant %f16 1.0\n"                                            \
11232                         "%v4f16                = OpTypeVector %f16 4\n"                                            \
11233                         "%fp_f16               = OpTypePointer Function %f16\n"                                    \
11234                         "%fp_v4f16             = OpTypePointer Function %v4f16\n"                                  \
11235                         "%c_v4f16_1_1_1_1      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11236                         "%a4f16                = OpTypeArray %f16 %c_u32_4\n"                                      \
11237
11238         NameConstantsCode                               tests[] =
11239         {
11240                 {
11241                         "vec4",
11242
11243                         FLOAT_16_COMMON_TYPES_AND_CONSTS
11244                         "%cval                 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11245                         "%param1_16            = OpFConvert %v4f16 %param1\n"
11246                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %cval\n"
11247                 },
11248                 {
11249                         "struct",
11250
11251                         FLOAT_16_COMMON_TYPES_AND_CONSTS
11252                         "%stype                = OpTypeStruct %v4f16 %f16\n"
11253                         "%fp_stype             = OpTypePointer Function %stype\n"
11254                         "%f16_n_1              = OpConstant %f16 -1.0\n"
11255                         "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11256                         "%cvec                 = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11257                         "%cval                 = OpConstantComposite %stype %cvec %f16_n_1\n",
11258
11259                         "%v                    = OpVariable %fp_stype Function %cval\n"
11260                         "%vec_ptr              = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11261                         "%f16_ptr              = OpAccessChain %fp_f16 %v %c_u32_1\n"
11262                         "%vec_val              = OpLoad %v4f16 %vec_ptr\n"
11263                         "%f16_val              = OpLoad %f16 %f16_ptr\n"
11264                         "%tmp1                 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11265                         "%param1_16            = OpFConvert %v4f16 %param1\n"
11266                         "%tmp2                 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11267                         "%transformed_param    = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11268                 },
11269                 {
11270                         // [1|0|0|0.5] [x] = x + 0.5
11271                         // [0|1|0|0.5] [y] = y + 0.5
11272                         // [0|0|1|0.5] [z] = z + 0.5
11273                         // [0|0|0|1  ] [1] = 1
11274                         "matrix",
11275
11276                         FLOAT_16_COMMON_TYPES_AND_CONSTS
11277                         "%mat4x4_f16           = OpTypeMatrix %v4f16 4\n"
11278                         "%v4f16_1_0_0_0        = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11279                         "%v4f16_0_1_0_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11280                         "%v4f16_0_0_1_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11281                         "%v4f16_0_5_0_5_0_5_1  = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11282                         "%cval                 = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
11283
11284                         "%param1_16            = OpFConvert %v4f16 %param1\n"
11285                         "%transformed_param    = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
11286                 },
11287                 {
11288                         "array",
11289
11290                         FLOAT_16_COMMON_TYPES_AND_CONSTS
11291                         "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11292                         "%fp_a4f16             = OpTypePointer Function %a4f16\n"
11293                         "%f16_n_1              = OpConstant %f16 -1.0\n"
11294                         "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11295                         "%carr                 = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11296
11297                         "%v                    = OpVariable %fp_a4f16 Function %carr\n"
11298                         "%f                    = OpAccessChain %fp_f16 %v %c_u32_0\n"
11299                         "%f1                   = OpAccessChain %fp_f16 %v %c_u32_1\n"
11300                         "%f2                   = OpAccessChain %fp_f16 %v %c_u32_2\n"
11301                         "%f3                   = OpAccessChain %fp_f16 %v %c_u32_3\n"
11302                         "%f_val                = OpLoad %f16 %f\n"
11303                         "%f1_val               = OpLoad %f16 %f1\n"
11304                         "%f2_val               = OpLoad %f16 %f2\n"
11305                         "%f3_val               = OpLoad %f16 %f3\n"
11306                         "%ftot1                = OpFAdd %f16 %f_val %f1_val\n"
11307                         "%ftot2                = OpFAdd %f16 %ftot1 %f2_val\n"
11308                         "%ftot3                = OpFAdd %f16 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
11309                         "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11310                         "%param1_16            = OpFConvert %v4f16 %param1\n"
11311                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
11312                 },
11313                 {
11314                         //
11315                         // [
11316                         //   {
11317                         //      0.0,
11318                         //      [ 1.0, 1.0, 1.0, 1.0]
11319                         //   },
11320                         //   {
11321                         //      1.0,
11322                         //      [ 0.0, 0.5, 0.0, 0.0]
11323                         //   }, //     ^^^
11324                         //   {
11325                         //      0.0,
11326                         //      [ 1.0, 1.0, 1.0, 1.0]
11327                         //   }
11328                         // ]
11329                         "array_of_struct_of_array",
11330
11331                         FLOAT_16_COMMON_TYPES_AND_CONSTS
11332                         "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11333                         "%fp_a4f16             = OpTypePointer Function %a4f16\n"
11334                         "%stype                = OpTypeStruct %f16 %a4f16\n"
11335                         "%a3stype              = OpTypeArray %stype %c_u32_3\n"
11336                         "%fp_a3stype           = OpTypePointer Function %a3stype\n"
11337                         "%ca4f16_0             = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11338                         "%ca4f16_1             = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11339                         "%cstype1              = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11340                         "%cstype2              = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11341                         "%carr                 = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11342
11343                         "%v                    = OpVariable %fp_a3stype Function %carr\n"
11344                         "%f                    = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11345                         "%f_l                  = OpLoad %f16 %f\n"
11346                         "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11347                         "%param1_16            = OpFConvert %v4f16 %param1\n"
11348                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
11349                 }
11350         };
11351
11352         getHalfColorsFullAlpha(inputColors);
11353         outputColors[0] = RGBA(255, 255, 255, 255);
11354         outputColors[1] = RGBA(255, 127, 127, 255);
11355         outputColors[2] = RGBA(127, 255, 127, 255);
11356         outputColors[3] = RGBA(127, 127, 255, 255);
11357
11358         extensions.push_back("VK_KHR_shader_float16_int8");
11359         features.extFloat16Int8.shaderFloat16 = true;
11360
11361         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11362         {
11363                 map<string, string> fragments;
11364
11365                 fragments["capability"] = "OpCapability Float16\n";
11366                 fragments["pre_main"]   = tests[testNdx].constants;
11367                 fragments["testfun"]    = string(functionStart) + tests[testNdx].code + functionEnd;
11368
11369                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
11370         }
11371         return opConstantCompositeTests.release();
11372 }
11373
11374 template<typename T>
11375 void finalizeTestsCreation (T&                                                  specResource,
11376                                                         const map<string, string>&      fragments,
11377                                                         tcu::TestContext&                       testCtx,
11378                                                         tcu::TestCaseGroup&                     testGroup,
11379                                                         const std::string&                      testName,
11380                                                         const VulkanFeatures&           vulkanFeatures,
11381                                                         const vector<string>&           extensions,
11382                                                         const IVec3&                            numWorkGroups,
11383                                                         const bool                                      splitRenderArea = false);
11384
11385 template<>
11386 void finalizeTestsCreation (GraphicsResources&                  specResource,
11387                                                         const map<string, string>&      fragments,
11388                                                         tcu::TestContext&                       ,
11389                                                         tcu::TestCaseGroup&                     testGroup,
11390                                                         const std::string&                      testName,
11391                                                         const VulkanFeatures&           vulkanFeatures,
11392                                                         const vector<string>&           extensions,
11393                                                         const IVec3&                            ,
11394                                                         const bool                                      splitRenderArea)
11395 {
11396         RGBA defaultColors[4];
11397         getDefaultColors(defaultColors);
11398
11399         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11400 }
11401
11402 template<>
11403 void finalizeTestsCreation (ComputeShaderSpec&                  specResource,
11404                                                         const map<string, string>&      fragments,
11405                                                         tcu::TestContext&                       testCtx,
11406                                                         tcu::TestCaseGroup&                     testGroup,
11407                                                         const std::string&                      testName,
11408                                                         const VulkanFeatures&           vulkanFeatures,
11409                                                         const vector<string>&           extensions,
11410                                                         const IVec3&                            numWorkGroups,
11411                                                         bool)
11412 {
11413         specResource.numWorkGroups = numWorkGroups;
11414         specResource.requestedVulkanFeatures = vulkanFeatures;
11415         specResource.extensions = extensions;
11416
11417         specResource.assembly = makeComputeShaderAssembly(fragments);
11418
11419         testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
11420 }
11421
11422 template<class SpecResource>
11423 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
11424 {
11425         const string                                            nan                                     = nanSupported ? "_nan" : "";
11426         const string                                            groupName                       = "logical" + nan;
11427         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
11428
11429         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11430         const string                                            spvCapabilities         = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11431         const string                                            spvExtensions           = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11432         const string                                            spvExecutionMode        = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11433         const deUint32                                          numDataPointsScalar     = 16;
11434         const deUint32                                          numDataPointsVector     = 14;
11435         const vector<deFloat16>                         float16DataScalar       = getFloat16s(rnd, numDataPointsScalar);
11436         const vector<deFloat16>                         float16DataVector       = getFloat16s(rnd, numDataPointsVector);
11437         const vector<deFloat16>                         float16Data1            = squarize(float16DataScalar, 0);                       // Total Size: square(sizeof(float16DataScalar))
11438         const vector<deFloat16>                         float16Data2            = squarize(float16DataScalar, 1);
11439         const vector<deFloat16>                         float16DataVec1         = squarizeVector(float16DataVector, 0);         // Total Size: 2 * (square(square(sizeof(float16DataVector))))
11440         const vector<deFloat16>                         float16DataVec2         = squarizeVector(float16DataVector, 1);
11441         const vector<deFloat16>                         float16OutUnused        (float16Data1.size(), 0);
11442         const vector<deFloat16>                         float16OutVecUnused     (float16DataVec1.size(), 0);
11443
11444         struct TestOp
11445         {
11446                 const char*             opCode;
11447                 VerifyIOFunc    verifyFuncNan;
11448                 VerifyIOFunc    verifyFuncNonNan;
11449                 const deUint32  argCount;
11450         };
11451
11452         const TestOp    testOps[]       =
11453         {
11454                 { "OpIsNan"                                             ,       compareFP16Logical<fp16isNan,                           true,  false, true>,    compareFP16Logical<fp16isNan,                           true,  false, false>,   1       },
11455                 { "OpIsInf"                                             ,       compareFP16Logical<fp16isInf,                           true,  false, true>,    compareFP16Logical<fp16isInf,                           true,  false, false>,   1       },
11456                 { "OpFOrdEqual"                                 ,       compareFP16Logical<fp16isEqual,                         false, true,  true>,    compareFP16Logical<fp16isEqual,                         false, true,  false>,   2       },
11457                 { "OpFUnordEqual"                               ,       compareFP16Logical<fp16isEqual,                         false, false, true>,    compareFP16Logical<fp16isEqual,                         false, false, false>,   2       },
11458                 { "OpFOrdNotEqual"                              ,       compareFP16Logical<fp16isUnequal,                       false, true,  true>,    compareFP16Logical<fp16isUnequal,                       false, true,  false>,   2       },
11459                 { "OpFUnordNotEqual"                    ,       compareFP16Logical<fp16isUnequal,                       false, false, true>,    compareFP16Logical<fp16isUnequal,                       false, false, false>,   2       },
11460                 { "OpFOrdLessThan"                              ,       compareFP16Logical<fp16isLess,                          false, true,  true>,    compareFP16Logical<fp16isLess,                          false, true,  false>,   2       },
11461                 { "OpFUnordLessThan"                    ,       compareFP16Logical<fp16isLess,                          false, false, true>,    compareFP16Logical<fp16isLess,                          false, false, false>,   2       },
11462                 { "OpFOrdGreaterThan"                   ,       compareFP16Logical<fp16isGreater,                       false, true,  true>,    compareFP16Logical<fp16isGreater,                       false, true,  false>,   2       },
11463                 { "OpFUnordGreaterThan"                 ,       compareFP16Logical<fp16isGreater,                       false, false, true>,    compareFP16Logical<fp16isGreater,                       false, false, false>,   2       },
11464                 { "OpFOrdLessThanEqual"                 ,       compareFP16Logical<fp16isLessOrEqual,           false, true,  true>,    compareFP16Logical<fp16isLessOrEqual,           false, true,  false>,   2       },
11465                 { "OpFUnordLessThanEqual"               ,       compareFP16Logical<fp16isLessOrEqual,           false, false, true>,    compareFP16Logical<fp16isLessOrEqual,           false, false, false>,   2       },
11466                 { "OpFOrdGreaterThanEqual"              ,       compareFP16Logical<fp16isGreaterOrEqual,        false, true,  true>,    compareFP16Logical<fp16isGreaterOrEqual,        false, true,  false>,   2       },
11467                 { "OpFUnordGreaterThanEqual"    ,       compareFP16Logical<fp16isGreaterOrEqual,        false, false, true>,    compareFP16Logical<fp16isGreaterOrEqual,        false, false, false>,   2       },
11468         };
11469
11470         { // scalar cases
11471                 const StringTemplate preMain
11472                 (
11473                         "      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11474                         "     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11475                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11476                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11477                         "            %f16 = OpTypeFloat 16\n"
11478                         "          %v2f16 = OpTypeVector %f16 2\n"
11479                         "        %c_f16_0 = OpConstant %f16 0.0\n"
11480                         "        %c_f16_1 = OpConstant %f16 1.0\n"
11481                         "         %up_u32 = OpTypePointer Uniform %u32\n"
11482                         "         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11483                         "         %SSBO16 = OpTypeStruct %ra_u32\n"
11484                         "      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11485                         "     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11486                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11487                         "      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11488                         "      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11489                         "       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11490                 );
11491
11492                 const StringTemplate decoration
11493                 (
11494                         "OpDecorate %ra_u32 ArrayStride 4\n"
11495                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
11496                         "OpDecorate %SSBO16 BufferBlock\n"
11497                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11498                         "OpDecorate %ssbo_src0 Binding 0\n"
11499                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11500                         "OpDecorate %ssbo_src1 Binding 1\n"
11501                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
11502                         "OpDecorate %ssbo_dst Binding 2\n"
11503                 );
11504
11505                 const StringTemplate testFun
11506                 (
11507                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11508                         "    %param = OpFunctionParameter %v4f32\n"
11509
11510                         "    %entry = OpLabel\n"
11511                         "        %i = OpVariable %fp_i32 Function\n"
11512                         "             OpStore %i %c_i32_0\n"
11513                         "             OpBranch %loop\n"
11514
11515                         "     %loop = OpLabel\n"
11516                         "    %i_cmp = OpLoad %i32 %i\n"
11517                         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11518                         "             OpLoopMerge %merge %next None\n"
11519                         "             OpBranchConditional %lt %write %merge\n"
11520
11521                         "    %write = OpLabel\n"
11522                         "      %ndx = OpLoad %i32 %i\n"
11523
11524                         " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11525
11526                         "${op_arg1_calc}"
11527
11528                         " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11529                         "  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11530                         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11531                         "             OpBranch %next\n"
11532
11533                         "     %next = OpLabel\n"
11534                         "    %i_cur = OpLoad %i32 %i\n"
11535                         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11536                         "             OpStore %i %i_new\n"
11537                         "             OpBranch %loop\n"
11538
11539                         "    %merge = OpLabel\n"
11540                         "             OpReturnValue %param\n"
11541
11542                         "             OpFunctionEnd\n"
11543                 );
11544
11545                 const StringTemplate arg1Calc
11546                 (
11547                         " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11548                 );
11549
11550                 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11551                 {
11552                         const size_t            iterations              = float16Data1.size();
11553                         const TestOp&           testOp                  = testOps[testOpsIdx];
11554                         const string            testName                = de::toLower(string(testOp.opCode)) + "_scalar";
11555                         SpecResource            specResource;
11556                         map<string, string>     specs;
11557                         VulkanFeatures          features;
11558                         map<string, string>     fragments;
11559                         vector<string>          extensions;
11560
11561                         specs["num_data_points"]        = de::toString(iterations);
11562                         specs["op_code"]                        = testOp.opCode;
11563                         specs["op_arg1"]                        = (testOp.argCount == 1) ? "" : "%val_src1";
11564                         specs["op_arg1_calc"]           = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11565
11566                         fragments["extension"]          = spvExtensions;
11567                         fragments["capability"]         = spvCapabilities;
11568                         fragments["execution_mode"]     = spvExecutionMode;
11569                         fragments["decoration"]         = decoration.specialize(specs);
11570                         fragments["pre_main"]           = preMain.specialize(specs);
11571                         fragments["testfun"]            = testFun.specialize(specs);
11572                         fragments["testfun"]            += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11573                         if (testOp.argCount > 1)
11574                         {
11575                                 fragments["testfun"]    += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11576                         }
11577                         fragments["testfun"]            += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11578
11579                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11580                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11581                         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11582                         specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11583
11584                         extensions.push_back("VK_KHR_shader_float16_int8");
11585
11586                         if (nanSupported)
11587                         {
11588                                 extensions.push_back("VK_KHR_shader_float_controls");
11589
11590                                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11591                         }
11592
11593                         features.extFloat16Int8.shaderFloat16 = true;
11594
11595                         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11596                 }
11597         }
11598         { // vector cases
11599                 const StringTemplate preMain
11600                 (
11601                         "        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11602                         "           %v2bool = OpTypeVector %bool 2\n"
11603                         "              %f16 = OpTypeFloat 16\n"
11604                         "          %c_f16_0 = OpConstant %f16 0.0\n"
11605                         "          %c_f16_1 = OpConstant %f16 1.0\n"
11606                         "            %v2f16 = OpTypeVector %f16 2\n"
11607                         "      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11608                         "      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11609                         "           %up_u32 = OpTypePointer Uniform %u32\n"
11610                         "           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11611                         "           %SSBO16 = OpTypeStruct %ra_u32\n"
11612                         "        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11613                         "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11614                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11615                         "        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11616                         "        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11617                         "         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11618                 );
11619
11620                 const StringTemplate decoration
11621                 (
11622                         "OpDecorate %ra_u32 ArrayStride 4\n"
11623                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
11624                         "OpDecorate %SSBO16 BufferBlock\n"
11625                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11626                         "OpDecorate %ssbo_src0 Binding 0\n"
11627                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11628                         "OpDecorate %ssbo_src1 Binding 1\n"
11629                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
11630                         "OpDecorate %ssbo_dst Binding 2\n"
11631                 );
11632
11633                 const StringTemplate testFun
11634                 (
11635                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11636                         "    %param = OpFunctionParameter %v4f32\n"
11637
11638                         "    %entry = OpLabel\n"
11639                         "        %i = OpVariable %fp_i32 Function\n"
11640                         "             OpStore %i %c_i32_0\n"
11641                         "             OpBranch %loop\n"
11642
11643                         "     %loop = OpLabel\n"
11644                         "    %i_cmp = OpLoad %i32 %i\n"
11645                         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11646                         "             OpLoopMerge %merge %next None\n"
11647                         "             OpBranchConditional %lt %write %merge\n"
11648
11649                         "    %write = OpLabel\n"
11650                         "      %ndx = OpLoad %i32 %i\n"
11651
11652                         " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11653
11654                         "${op_arg1_calc}"
11655
11656                         " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11657                         "  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11658                         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11659                         "             OpBranch %next\n"
11660
11661                         "     %next = OpLabel\n"
11662                         "    %i_cur = OpLoad %i32 %i\n"
11663                         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11664                         "             OpStore %i %i_new\n"
11665                         "             OpBranch %loop\n"
11666
11667                         "    %merge = OpLabel\n"
11668                         "             OpReturnValue %param\n"
11669
11670                         "             OpFunctionEnd\n"
11671                 );
11672
11673                 const StringTemplate arg1Calc
11674                 (
11675                         " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11676                 );
11677
11678                 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11679                 {
11680                         const deUint32          itemsPerVec     = 2;
11681                         const size_t            iterations      = float16DataVec1.size() / itemsPerVec;
11682                         const TestOp&           testOp          = testOps[testOpsIdx];
11683                         const string            testName        = de::toLower(string(testOp.opCode)) + "_vector";
11684                         SpecResource            specResource;
11685                         map<string, string>     specs;
11686                         vector<string>          extensions;
11687                         VulkanFeatures          features;
11688                         map<string, string>     fragments;
11689
11690                         specs["num_data_points"]        = de::toString(iterations);
11691                         specs["op_code"]                        = testOp.opCode;
11692                         specs["op_arg1"]                        = (testOp.argCount == 1) ? "" : "%val_src1";
11693                         specs["op_arg1_calc"]           = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11694
11695                         fragments["extension"]          = spvExtensions;
11696                         fragments["capability"]         = spvCapabilities;
11697                         fragments["execution_mode"]     = spvExecutionMode;
11698                         fragments["decoration"]         = decoration.specialize(specs);
11699                         fragments["pre_main"]           = preMain.specialize(specs);
11700                         fragments["testfun"]            = testFun.specialize(specs);
11701                         fragments["testfun"]            += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11702                         if (testOp.argCount > 1)
11703                         {
11704                                 fragments["testfun"]    += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11705                         }
11706                         fragments["testfun"]            += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11707
11708                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11709                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11710                         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11711                         specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11712
11713                         extensions.push_back("VK_KHR_shader_float16_int8");
11714
11715                         if (nanSupported)
11716                         {
11717                                 extensions.push_back("VK_KHR_shader_float_controls");
11718
11719                                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11720                         }
11721
11722                         features.extFloat16Int8.shaderFloat16 = true;
11723
11724                         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1), true);
11725                 }
11726         }
11727
11728         return testGroup.release();
11729 }
11730
11731 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11732 {
11733         if (inputs.size() != 1 || outputAllocs.size() != 1)
11734                 return false;
11735
11736         vector<deUint8> input1Bytes;
11737
11738         inputs[0].getBytes(input1Bytes);
11739
11740         const deUint16* const   input1AsFP16    = (const deUint16*)&input1Bytes[0];
11741         const deUint16* const   outputAsFP16    = (const deUint16*)outputAllocs[0]->getHostPtr();
11742         std::string                             error;
11743
11744         for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11745         {
11746                 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11747                 {
11748                         log << TestLog::Message << error << TestLog::EndMessage;
11749
11750                         return false;
11751                 }
11752         }
11753
11754         return true;
11755 }
11756
11757 template<class SpecResource>
11758 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11759 {
11760         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11761
11762         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11763         const StringTemplate                            capabilities            ("OpCapability Float16\n");
11764         const deUint32                                          numDataPoints           = 256;
11765         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11766         const vector<deFloat16>                         float16OutputUnused     (float16InputData.size(), 0);
11767         map<string, string>                                     fragments;
11768
11769         struct TestType
11770         {
11771                 const deUint32  typeComponents;
11772                 const char*             typeName;
11773                 const char*             typeDecls;
11774                 const char*             typeStorage;
11775                 const string            loadFunc;
11776                 const string            storeFunc;
11777         };
11778
11779         const TestType  testTypes[]     =
11780         {
11781                 {
11782                         1,
11783                         "f16",
11784                         "      %v2f16 = OpTypeVector %f16 2\n"
11785                         "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11786                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11787                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11788                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11789                         "u32_hndp",
11790                         loadScalarF16FromUint,
11791                         storeScalarF16AsUint
11792                 },
11793                 {
11794                         2,
11795                         "v2f16",
11796                         "      %v2f16 = OpTypeVector %f16 2\n"
11797                         "  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11798                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11799                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11800                         "u32_ndp",
11801                         loadV2F16FromUint,
11802                         storeV2F16AsUint
11803                 },
11804                 {
11805                         4,
11806                         "v4f16",
11807                         "      %v2f16 = OpTypeVector %f16 2\n"
11808                         "      %v4f16 = OpTypeVector %f16 4\n"
11809                         "  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11810                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11811                         "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11812                         "ra_u32_2",
11813                         loadV4F16FromUints,
11814                         storeV4F16AsUints
11815                 },
11816         };
11817
11818         const StringTemplate preMain
11819         (
11820                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11821                 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11822                 "     %v2bool = OpTypeVector %bool 2\n"
11823                 "        %f16 = OpTypeFloat 16\n"
11824                 "    %c_f16_0 = OpConstant %f16 0.0\n"
11825
11826                 "${type_decls}"
11827
11828                 "  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11829                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11830                 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11831                 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11832                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11833                 "         %up_u32 = OpTypePointer Uniform %u32\n"
11834                 "     %SSBO16 = OpTypeStruct %ra_${ts}\n"
11835                 "  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11836                 "   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11837                 "   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11838         );
11839
11840         const StringTemplate decoration
11841         (
11842                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11843                 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11844                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
11845                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11846                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11847                 "OpDecorate %SSBO16 BufferBlock\n"
11848                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11849                 "OpDecorate %ssbo_src Binding 0\n"
11850                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11851                 "OpDecorate %ssbo_dst Binding 1\n"
11852         );
11853
11854         const StringTemplate testFun
11855         (
11856                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11857                 "    %param = OpFunctionParameter %v4f32\n"
11858                 "    %entry = OpLabel\n"
11859
11860                 "        %i = OpVariable %fp_i32 Function\n"
11861                 "             OpStore %i %c_i32_0\n"
11862                 "             OpBranch %loop\n"
11863
11864                 "     %loop = OpLabel\n"
11865                 "    %i_cmp = OpLoad %i32 %i\n"
11866                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11867                 "             OpLoopMerge %merge %next None\n"
11868                 "             OpBranchConditional %lt %write %merge\n"
11869
11870                 "    %write = OpLabel\n"
11871                 "      %ndx = OpLoad %i32 %i\n"
11872
11873                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11874                 "  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11875                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11876                 "             OpBranch %next\n"
11877
11878                 "     %next = OpLabel\n"
11879                 "    %i_cur = OpLoad %i32 %i\n"
11880                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11881                 "             OpStore %i %i_new\n"
11882                 "             OpBranch %loop\n"
11883
11884                 "    %merge = OpLabel\n"
11885                 "             OpReturnValue %param\n"
11886
11887                 "             OpFunctionEnd\n"
11888
11889                 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11890                 "   %param0 = OpFunctionParameter %${tt}\n"
11891                 " %entry_pf = OpLabel\n"
11892                 "     %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11893                 "             OpReturnValue %res0\n"
11894                 "             OpFunctionEnd\n"
11895         );
11896
11897         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11898         {
11899                 const TestType&         testType                = testTypes[testTypeIdx];
11900                 const string            testName                = testType.typeName;
11901                 const deUint32          itemsPerType    = testType.typeComponents;
11902                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11903                 const size_t            typeStride              = itemsPerType * sizeof(deFloat16);
11904                 SpecResource            specResource;
11905                 map<string, string>     specs;
11906                 VulkanFeatures          features;
11907                 vector<string>          extensions;
11908
11909                 specs["num_data_points"]        = de::toString(iterations);
11910                 specs["tt"]                                     = testType.typeName;
11911                 specs["ts"]                                     = testType.typeStorage;
11912                 specs["tt_stride"]                      = de::toString(typeStride);
11913                 specs["type_decls"]                     = testType.typeDecls;
11914
11915                 fragments["capability"]         = capabilities.specialize(specs);
11916                 fragments["decoration"]         = decoration.specialize(specs);
11917                 fragments["pre_main"]           = preMain.specialize(specs);
11918                 fragments["testfun"]            = testFun.specialize(specs);
11919                 fragments["testfun"]            += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11920                 fragments["testfun"]            += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11921
11922                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11923                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11924                 specResource.verifyIO = compareFP16FunctionSetFunc;
11925
11926                 extensions.push_back("VK_KHR_shader_float16_int8");
11927
11928                 features.extFloat16Int8.shaderFloat16 = true;
11929
11930                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11931         }
11932
11933         return testGroup.release();
11934 }
11935
11936 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11937 {
11938         if (inputs.size() != 2 || outputAllocs.size() != 1)
11939                 return false;
11940
11941         vector<deUint8> input1Bytes;
11942         vector<deUint8> input2Bytes;
11943
11944         inputs[0].getBytes(input1Bytes);
11945         inputs[1].getBytes(input2Bytes);
11946
11947         DE_ASSERT(input1Bytes.size() > 0);
11948         DE_ASSERT(input2Bytes.size() > 0);
11949         DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11950
11951         const size_t                    iterations              = input2Bytes.size() / sizeof(deUint32);
11952         const size_t                    components              = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11953         const deFloat16* const  input1AsFP16    = (const deFloat16*)&input1Bytes[0];
11954         const deUint32* const   inputIndices    = (const deUint32*)&input2Bytes[0];
11955         const deFloat16* const  outputAsFP16    = (const deFloat16*)outputAllocs[0]->getHostPtr();
11956         std::string                             error;
11957
11958         DE_ASSERT(components == 2 || components == 4);
11959         DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11960
11961         for (size_t idx = 0; idx < iterations; ++idx)
11962         {
11963                 const deUint32  componentNdx    = inputIndices[idx];
11964
11965                 DE_ASSERT(componentNdx < components);
11966
11967                 const deFloat16 expected                = input1AsFP16[components * idx + componentNdx];
11968
11969                 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11970                 {
11971                         log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11972
11973                         return false;
11974                 }
11975         }
11976
11977         return true;
11978 }
11979
11980 template<class SpecResource>
11981 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11982 {
11983         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11984
11985         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11986         const deUint32                                          numDataPoints           = 256;
11987         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11988         const vector<deFloat16>                         float16OutputUnused     (float16InputData.size(), 0);
11989
11990         struct TestType
11991         {
11992                 const deUint32  typeComponents;
11993                 const size_t    typeStride;
11994                 const char*             typeName;
11995                 const char*             typeDecls;
11996                 const char*             typeStorage;
11997                 const string            loadFunction;
11998                 const string            storeFunction;
11999         };
12000
12001         const TestType  testTypes[]     =
12002         {
12003                 {
12004                         2,
12005                         2 * sizeof(deFloat16),
12006                         "v2f16",
12007                         "      %v2f16 = OpTypeVector %f16 2\n"
12008                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12009                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12010                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12011                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12012                         "u32",
12013                         loadV2F16FromUint,
12014                         storeScalarF16AsUint
12015                 },
12016                 {
12017                         3,
12018                         4 * sizeof(deFloat16),
12019                         "v3f16",
12020                         "      %v2f16 = OpTypeVector %f16 2\n"
12021                         "      %v3f16 = OpTypeVector %f16 3\n"
12022                         "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12023                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12024                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12025                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12026                         "ra_u32_2",
12027                         loadV3F16FromUints,
12028                         storeScalarF16AsUint
12029                 },
12030                 {
12031                         4,
12032                         4 * sizeof(deFloat16),
12033                         "v4f16",
12034                         "      %v2f16 = OpTypeVector %f16 2\n"
12035                         "      %v4f16 = OpTypeVector %f16 4\n"
12036                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12037                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12038                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12039                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12040                         "ra_u32_2",
12041                         loadV4F16FromUints,
12042                         storeScalarF16AsUint
12043                 },
12044         };
12045
12046         const StringTemplate preMain
12047         (
12048                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12049                 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12050                 "        %f16 = OpTypeFloat 16\n"
12051
12052                 "${type_decl}"
12053
12054                 "     %up_u32 = OpTypePointer Uniform %u32\n"
12055                 "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12056                 "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12057                 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12058
12059                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12060                 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12061                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12062                 "   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12063                 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12064
12065                 " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12066                 "   %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12067                 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12068
12069                 "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12070                 "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12071                 "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12072         );
12073
12074         const StringTemplate decoration
12075         (
12076                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12077                 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12078                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12079                 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12080                 "OpDecorate %SSBO_SRC BufferBlock\n"
12081                 "OpDecorate %ssbo_src DescriptorSet 0\n"
12082                 "OpDecorate %ssbo_src Binding 0\n"
12083
12084                 "OpDecorate %ra_u32 ArrayStride 4\n"
12085                 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12086                 "OpDecorate %SSBO_IDX BufferBlock\n"
12087                 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12088                 "OpDecorate %ssbo_idx Binding 1\n"
12089
12090                 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12091                 "OpDecorate %SSBO_DST BufferBlock\n"
12092                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12093                 "OpDecorate %ssbo_dst Binding 2\n"
12094         );
12095
12096         const StringTemplate testFun
12097         (
12098                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12099                 "    %param = OpFunctionParameter %v4f32\n"
12100                 "    %entry = OpLabel\n"
12101
12102                 "        %i = OpVariable %fp_i32 Function\n"
12103                 "             OpStore %i %c_i32_0\n"
12104
12105                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12106                 "             OpSelectionMerge %end_if None\n"
12107                 "             OpBranchConditional %will_run %run_test %end_if\n"
12108
12109                 " %run_test = OpLabel\n"
12110                 "             OpBranch %loop\n"
12111
12112                 "     %loop = OpLabel\n"
12113                 "    %i_cmp = OpLoad %i32 %i\n"
12114                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12115                 "             OpLoopMerge %merge %next None\n"
12116                 "             OpBranchConditional %lt %write %merge\n"
12117
12118                 "    %write = OpLabel\n"
12119                 "      %ndx = OpLoad %i32 %i\n"
12120
12121                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12122
12123                 "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12124                 "  %val_idx = OpLoad %u32 %src_idx\n"
12125
12126                 "  %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12127                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12128
12129                 "             OpBranch %next\n"
12130
12131                 "     %next = OpLabel\n"
12132                 "    %i_cur = OpLoad %i32 %i\n"
12133                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12134                 "             OpStore %i %i_new\n"
12135                 "             OpBranch %loop\n"
12136
12137                 "    %merge = OpLabel\n"
12138                 "             OpBranch %end_if\n"
12139                 "   %end_if = OpLabel\n"
12140                 "             OpReturnValue %param\n"
12141
12142                 "             OpFunctionEnd\n"
12143         );
12144
12145         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12146         {
12147                 const TestType&         testType                = testTypes[testTypeIdx];
12148                 const string            testName                = testType.typeName;
12149                 const size_t            itemsPerType    = testType.typeStride / sizeof(deFloat16);
12150                 const size_t            iterations              = float16InputData.size() / itemsPerType;
12151                 SpecResource            specResource;
12152                 map<string, string>     specs;
12153                 VulkanFeatures          features;
12154                 vector<deUint32>        inputDataNdx;
12155                 map<string, string>     fragments;
12156                 vector<string>          extensions;
12157
12158                 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12159                         inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12160
12161                 specs["num_data_points"]        = de::toString(iterations);
12162                 specs["tt"]                                     = testType.typeName;
12163                 specs["ts"]                                     = testType.typeStorage;
12164                 specs["tt_stride"]                      = de::toString(testType.typeStride);
12165                 specs["type_decl"]                      = testType.typeDecls;
12166
12167                 fragments["capability"]         = "OpCapability Float16\n";
12168                 fragments["decoration"]         = decoration.specialize(specs);
12169                 fragments["pre_main"]           = preMain.specialize(specs);
12170                 fragments["testfun"]            = testFun.specialize(specs);
12171                 fragments["testfun"]            += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12172                 fragments["testfun"]            += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12173
12174                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12175                 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12176                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12177                 specResource.verifyIO = compareFP16VectorExtractFunc;
12178
12179                 extensions.push_back("VK_KHR_shader_float16_int8");
12180
12181                 features.extFloat16Int8.shaderFloat16 = true;
12182
12183                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12184         }
12185
12186         return testGroup.release();
12187 }
12188
12189 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
12190 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12191 {
12192         if (inputs.size() != 2 || outputAllocs.size() != 1)
12193                 return false;
12194
12195         vector<deUint8> input1Bytes;
12196         vector<deUint8> input2Bytes;
12197
12198         inputs[0].getBytes(input1Bytes);
12199         inputs[1].getBytes(input2Bytes);
12200
12201         DE_ASSERT(input1Bytes.size() > 0);
12202         DE_ASSERT(input2Bytes.size() > 0);
12203         DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12204
12205         const size_t                    iterations                      = input2Bytes.size() / sizeof(deUint32);
12206         const size_t                    componentsStride        = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12207         const deFloat16* const  input1AsFP16            = (const deFloat16*)&input1Bytes[0];
12208         const deUint32* const   inputIndices            = (const deUint32*)&input2Bytes[0];
12209         const deFloat16* const  outputAsFP16            = (const deFloat16*)outputAllocs[0]->getHostPtr();
12210         const deFloat16                 magic                           = tcu::Float16(float(REPLACEMENT)).bits();
12211         std::string                             error;
12212
12213         DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12214         DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12215
12216         for (size_t idx = 0; idx < iterations; ++idx)
12217         {
12218                 const deFloat16*        inputVec                = &input1AsFP16[componentsStride * idx];
12219                 const deFloat16*        outputVec               = &outputAsFP16[componentsStride * idx];
12220                 const deUint32          replacedCompNdx = inputIndices[idx];
12221
12222                 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12223
12224                 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12225                 {
12226                         const deFloat16 expected        = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12227
12228                         if (!compare16BitFloat(expected, outputVec[compNdx], error))
12229                         {
12230                                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12231
12232                                 return false;
12233                         }
12234                 }
12235         }
12236
12237         return true;
12238 }
12239
12240 template<class SpecResource>
12241 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
12242 {
12243         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
12244
12245         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
12246         const deUint32                                          replacement                     = 42;
12247         const deUint32                                          numDataPoints           = 256;
12248         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
12249         const vector<deFloat16>                         float16OutputUnused     (float16InputData.size(), 0);
12250
12251         struct TestType
12252         {
12253                 const deUint32  typeComponents;
12254                 const size_t    typeStride;
12255                 const char*             typeName;
12256                 const char*             typeDecls;
12257                 VerifyIOFunc    verifyIOFunc;
12258                 const char*             typeStorage;
12259                 const string            loadFunction;
12260                 const string            storeFunction;
12261         };
12262
12263         const TestType  testTypes[]     =
12264         {
12265                 {
12266                         2,
12267                         2 * sizeof(deFloat16),
12268                         "v2f16",
12269                         "      %v2f16 = OpTypeVector %f16 2\n"
12270                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12271                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12272                         compareFP16VectorInsertFunc<2, replacement>,
12273                         "u32",
12274                         loadV2F16FromUint,
12275                         storeV2F16AsUint
12276                 },
12277                 {
12278                         3,
12279                         4 * sizeof(deFloat16),
12280                         "v3f16",
12281                         "      %v2f16 = OpTypeVector %f16 2\n"
12282                         "      %v3f16 = OpTypeVector %f16 3\n"
12283                         "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12284                         "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12285                         compareFP16VectorInsertFunc<3, replacement>,
12286                         "ra_u32_2",
12287                         loadV3F16FromUints,
12288                         storeV3F16AsUints
12289                 },
12290                 {
12291                         4,
12292                         4 * sizeof(deFloat16),
12293                         "v4f16",
12294                         "      %v2f16 = OpTypeVector %f16 2\n"
12295                         "      %v4f16 = OpTypeVector %f16 4\n"
12296                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12297                         "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12298                         compareFP16VectorInsertFunc<4, replacement>,
12299                         "ra_u32_2",
12300                         loadV4F16FromUints,
12301                         storeV4F16AsUints
12302                 },
12303         };
12304
12305         const StringTemplate preMain
12306         (
12307                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12308                 "        %f16 = OpTypeFloat 16\n"
12309                 "  %c_f16_ins = OpConstant %f16 ${replacement}\n"
12310
12311                 "${type_decl}"
12312
12313                 "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12314                 "         %up_u32 = OpTypePointer Uniform %u32\n"
12315                 "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12316                 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12317
12318                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12319                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12320                 "   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12321                 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12322
12323                 "   %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12324                 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12325
12326                 "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12327                 "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12328                 "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12329         );
12330
12331         const StringTemplate decoration
12332         (
12333                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12334                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12335                 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12336                 "OpDecorate %SSBO_SRC BufferBlock\n"
12337                 "OpDecorate %ssbo_src DescriptorSet 0\n"
12338                 "OpDecorate %ssbo_src Binding 0\n"
12339
12340                 "OpDecorate %ra_u32 ArrayStride 4\n"
12341                 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12342                 "OpDecorate %SSBO_IDX BufferBlock\n"
12343                 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12344                 "OpDecorate %ssbo_idx Binding 1\n"
12345
12346                 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12347                 "OpDecorate %SSBO_DST BufferBlock\n"
12348                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12349                 "OpDecorate %ssbo_dst Binding 2\n"
12350         );
12351
12352         const StringTemplate testFun
12353         (
12354                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12355                 "    %param = OpFunctionParameter %v4f32\n"
12356                 "    %entry = OpLabel\n"
12357
12358                 "        %i = OpVariable %fp_i32 Function\n"
12359                 "             OpStore %i %c_i32_0\n"
12360
12361                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12362                 "             OpSelectionMerge %end_if None\n"
12363                 "             OpBranchConditional %will_run %run_test %end_if\n"
12364
12365                 " %run_test = OpLabel\n"
12366                 "             OpBranch %loop\n"
12367
12368                 "     %loop = OpLabel\n"
12369                 "    %i_cmp = OpLoad %i32 %i\n"
12370                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12371                 "             OpLoopMerge %merge %next None\n"
12372                 "             OpBranchConditional %lt %write %merge\n"
12373
12374                 "    %write = OpLabel\n"
12375                 "      %ndx = OpLoad %i32 %i\n"
12376
12377                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12378
12379                 "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12380                 "  %val_idx = OpLoad %u32 %src_idx\n"
12381
12382                 "  %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12383                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12384
12385                 "             OpBranch %next\n"
12386
12387                 "     %next = OpLabel\n"
12388                 "    %i_cur = OpLoad %i32 %i\n"
12389                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12390                 "             OpStore %i %i_new\n"
12391                 "             OpBranch %loop\n"
12392
12393                 "    %merge = OpLabel\n"
12394                 "             OpBranch %end_if\n"
12395                 "   %end_if = OpLabel\n"
12396                 "             OpReturnValue %param\n"
12397
12398                 "             OpFunctionEnd\n"
12399         );
12400
12401         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12402         {
12403                 const TestType&         testType                = testTypes[testTypeIdx];
12404                 const string            testName                = testType.typeName;
12405                 const size_t            itemsPerType    = testType.typeStride / sizeof(deFloat16);
12406                 const size_t            iterations              = float16InputData.size() / itemsPerType;
12407                 SpecResource            specResource;
12408                 map<string, string>     specs;
12409                 VulkanFeatures          features;
12410                 vector<deUint32>        inputDataNdx;
12411                 map<string, string>     fragments;
12412                 vector<string>          extensions;
12413
12414                 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12415                         inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12416
12417                 specs["num_data_points"]        = de::toString(iterations);
12418                 specs["tt"]                                     = testType.typeName;
12419                 specs["ts"]                                     = testType.typeStorage;
12420                 specs["tt_stride"]                      = de::toString(testType.typeStride);
12421                 specs["type_decl"]                      = testType.typeDecls;
12422                 specs["replacement"]            = de::toString(replacement);
12423
12424                 fragments["capability"]         = "OpCapability Float16\n";
12425                 fragments["decoration"]         = decoration.specialize(specs);
12426                 fragments["pre_main"]           = preMain.specialize(specs);
12427                 fragments["testfun"]            = testFun.specialize(specs);
12428                 fragments["testfun"]            += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12429                 fragments["testfun"]            += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12430
12431                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12432                 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12433                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12434                 specResource.verifyIO = testType.verifyIOFunc;
12435
12436                 extensions.push_back("VK_KHR_shader_float16_int8");
12437
12438                 features.extFloat16Int8.shaderFloat16 = true;
12439
12440                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12441         }
12442
12443         return testGroup.release();
12444 }
12445
12446 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
12447 {
12448         const size_t    compNdxCount    = (vec1Len + vec2Len + 1);
12449         const size_t    compNdxLimited  = iteration % (compNdxCount * compNdxCount);
12450         size_t                  comp;
12451
12452         switch (componentNdx)
12453         {
12454                 case 0: comp = compNdxLimited / compNdxCount; break;
12455                 case 1: comp = compNdxLimited % compNdxCount; break;
12456                 case 2: comp = 0; break;
12457                 case 3: comp = 1; break;
12458                 default: TCU_THROW(InternalError, "Impossible");
12459         }
12460
12461         if (comp >= vec1Len + vec2Len)
12462         {
12463                 validate = false;
12464                 return 0;
12465         }
12466         else
12467         {
12468                 validate = true;
12469                 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12470         }
12471 }
12472
12473 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
12474 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12475 {
12476         DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12477         DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12478         DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12479
12480         if (inputs.size() != 2 || outputAllocs.size() != 1)
12481                 return false;
12482
12483         vector<deUint8> input1Bytes;
12484         vector<deUint8> input2Bytes;
12485
12486         inputs[0].getBytes(input1Bytes);
12487         inputs[1].getBytes(input2Bytes);
12488
12489         DE_ASSERT(input1Bytes.size() > 0);
12490         DE_ASSERT(input2Bytes.size() > 0);
12491         DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12492
12493         const size_t                    componentsStrideDst             = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12494         const size_t                    componentsStrideSrc0    = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12495         const size_t                    componentsStrideSrc1    = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12496         const size_t                    iterations                              = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12497         const deFloat16* const  input1AsFP16                    = (const deFloat16*)&input1Bytes[0];
12498         const deFloat16* const  input2AsFP16                    = (const deFloat16*)&input2Bytes[0];
12499         const deFloat16* const  outputAsFP16                    = (const deFloat16*)outputAllocs[0]->getHostPtr();
12500         std::string                             error;
12501
12502         DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12503         DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12504
12505         for (size_t idx = 0; idx < iterations; ++idx)
12506         {
12507                 const deFloat16*        input1Vec       = &input1AsFP16[componentsStrideSrc0 * idx];
12508                 const deFloat16*        input2Vec       = &input2AsFP16[componentsStrideSrc1 * idx];
12509                 const deFloat16*        outputVec       = &outputAsFP16[componentsStrideDst * idx];
12510
12511                 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12512                 {
12513                         bool            validate        = true;
12514                         deFloat16       expected        = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
12515
12516                         if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12517                         {
12518                                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12519
12520                                 return false;
12521                         }
12522                 }
12523         }
12524
12525         return true;
12526 }
12527
12528 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
12529 {
12530         DE_ASSERT(dstComponentsCount <= 4);
12531         DE_ASSERT(src0ComponentsCount <= 4);
12532         DE_ASSERT(src1ComponentsCount <= 4);
12533         deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12534
12535         switch (funcCode)
12536         {
12537                 case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
12538                 case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
12539                 case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
12540                 case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12541                 case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12542                 case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12543                 case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12544                 case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12545                 case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12546                 case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12547                 case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12548                 case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12549                 case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12550                 case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12551                 case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12552                 case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12553                 case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12554                 case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12555                 case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12556                 case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12557                 case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12558                 case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12559                 case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12560                 case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12561                 case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12562                 case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12563                 case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12564                 default: TCU_THROW(InternalError, "Invalid number of components specified.");
12565         }
12566 }
12567
12568 template<class SpecResource>
12569 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12570 {
12571         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12572         const int                                                       testSpecificSeed        = deStringHash(testGroup->getName());
12573         const int                                                       seed                            = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12574         de::Random                                                      rnd                                     (seed);
12575         const deUint32                                          numDataPoints           = 128;
12576         map<string, string>                                     fragments;
12577
12578         struct TestType
12579         {
12580                 const deUint32  typeComponents;
12581                 const char*             typeName;
12582                 const string    loadFunction;
12583                 const string    storeFunction;
12584         };
12585
12586         const TestType  testTypes[]     =
12587         {
12588                 {
12589                         2,
12590                         "v2f16",
12591                         loadV2F16FromUint,
12592                         storeV2F16AsUint
12593                 },
12594                 {
12595                         3,
12596                         "v3f16",
12597                         loadV3F16FromUints,
12598                         storeV3F16AsUints
12599                 },
12600                 {
12601                         4,
12602                         "v4f16",
12603                         loadV4F16FromUints,
12604                         storeV4F16AsUints
12605                 },
12606         };
12607
12608         const StringTemplate preMain
12609         (
12610                 "    %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12611                 "     %c_i32_cc = OpConstant %i32 ${case_count}\n"
12612                 "          %f16 = OpTypeFloat 16\n"
12613                 "        %v2f16 = OpTypeVector %f16 2\n"
12614                 "        %v3f16 = OpTypeVector %f16 3\n"
12615                 "        %v4f16 = OpTypeVector %f16 4\n"
12616
12617                 "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12618                 "     %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12619                 "     %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12620                 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12621                 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12622                 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12623
12624                 "     %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12625                 "   %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12626                 "  %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12627                 "       %up_u32 = OpTypePointer Uniform %u32\n"
12628                 "   %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12629                 "   %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12630                 "   %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12631
12632                 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12633                 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12634                 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12635
12636                 "        %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12637
12638                 "    %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12639                 "    %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12640                 "     %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12641         );
12642
12643         const StringTemplate decoration
12644         (
12645                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12646                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12647                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12648
12649                 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12650                 "OpDecorate %SSBO_v2f16 BufferBlock\n"
12651
12652                 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12653                 "OpDecorate %SSBO_v3f16 BufferBlock\n"
12654
12655                 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12656                 "OpDecorate %SSBO_v4f16 BufferBlock\n"
12657
12658                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12659                 "OpDecorate %ssbo_src0 Binding 0\n"
12660                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12661                 "OpDecorate %ssbo_src1 Binding 1\n"
12662                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12663                 "OpDecorate %ssbo_dst Binding 2\n"
12664         );
12665
12666         const StringTemplate testFun
12667         (
12668                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12669                 "    %param = OpFunctionParameter %v4f32\n"
12670                 "    %entry = OpLabel\n"
12671
12672                 "        %i = OpVariable %fp_i32 Function\n"
12673                 "             OpStore %i %c_i32_0\n"
12674
12675                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12676                 "             OpSelectionMerge %end_if None\n"
12677                 "             OpBranchConditional %will_run %run_test %end_if\n"
12678
12679                 " %run_test = OpLabel\n"
12680                 "             OpBranch %loop\n"
12681
12682                 "     %loop = OpLabel\n"
12683                 "    %i_cmp = OpLoad %i32 %i\n"
12684                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12685                 "             OpLoopMerge %merge %next None\n"
12686                 "             OpBranchConditional %lt %write %merge\n"
12687
12688                 "    %write = OpLabel\n"
12689                 "      %ndx = OpLoad %i32 %i\n"
12690                 " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12691                 " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12692                 "  %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12693                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12694                 "             OpBranch %next\n"
12695
12696                 "     %next = OpLabel\n"
12697                 "    %i_cur = OpLoad %i32 %i\n"
12698                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12699                 "             OpStore %i %i_new\n"
12700                 "             OpBranch %loop\n"
12701
12702                 "    %merge = OpLabel\n"
12703                 "             OpBranch %end_if\n"
12704                 "   %end_if = OpLabel\n"
12705                 "             OpReturnValue %param\n"
12706                 "             OpFunctionEnd\n"
12707                 "\n"
12708
12709                 "   %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12710                 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12711                 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12712                 "%sw_paramn = OpFunctionParameter %i32\n"
12713                 " %sw_entry = OpLabel\n"
12714                 "   %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12715                 "             OpSelectionMerge %switch_e None\n"
12716                 "             OpSwitch %modulo %default ${case_list}\n"
12717                 "${case_bodies}"
12718                 "%default   = OpLabel\n"
12719                 "             OpUnreachable\n" // Unreachable default case for switch statement
12720                 "%switch_e  = OpLabel\n"
12721                 "             OpUnreachable\n" // Unreachable merge block for switch statement
12722                 "             OpFunctionEnd\n"
12723         );
12724
12725         const StringTemplate testCaseBody
12726         (
12727                 "%case_${case_ndx}    = OpLabel\n"
12728                 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12729                 "             OpReturnValue %val_dst_${case_ndx}\n"
12730         );
12731
12732         for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12733         {
12734                 const TestType& dstType                 = testTypes[dstTypeIdx];
12735
12736                 for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12737                 {
12738                         const TestType& src0Type        = testTypes[comp0Idx];
12739
12740                         for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12741                         {
12742                                 const TestType&                 src1Type                        = testTypes[comp1Idx];
12743                                 const deUint32                  input0Stride            = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12744                                 const deUint32                  input1Stride            = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12745                                 const deUint32                  outputStride            = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12746                                 const vector<deFloat16> float16Input0Data       = getFloat16s(rnd, input0Stride * numDataPoints);
12747                                 const vector<deFloat16> float16Input1Data       = getFloat16s(rnd, input1Stride * numDataPoints);
12748                                 const vector<deFloat16> float16OutputUnused     (outputStride * numDataPoints, 0);
12749                                 const string                    testName                        = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12750                                 deUint32                                caseCount                       = 0;
12751                                 SpecResource                    specResource;
12752                                 map<string, string>             specs;
12753                                 vector<string>                  extensions;
12754                                 VulkanFeatures                  features;
12755                                 string                                  caseBodies;
12756                                 string                                  caseList;
12757
12758                                 // Generate case
12759                                 {
12760                                         vector<string>  componentList;
12761
12762                                         // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12763                                         {
12764                                                 deUint32                caseNo          = 0;
12765
12766                                                 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12767                                                         componentList.push_back(de::toString(caseNo++));
12768                                                 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12769                                                         componentList.push_back(de::toString(caseNo++));
12770                                                 componentList.push_back("0xFFFFFFFF");
12771                                         }
12772
12773                                         for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12774                                         {
12775                                                 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12776                                                 {
12777                                                         map<string, string>     specCase;
12778                                                         string                          shuffle         = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12779
12780                                                         for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12781                                                                 shuffle += " " + de::toString(compIdx - 2);
12782
12783                                                         specCase["case_ndx"]    = de::toString(caseCount);
12784                                                         specCase["shuffle"]             = shuffle;
12785                                                         specCase["tt_dst"]              = dstType.typeName;
12786
12787                                                         caseBodies      += testCaseBody.specialize(specCase);
12788                                                         caseList        += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12789
12790                                                         caseCount++;
12791                                                 }
12792                                         }
12793                                 }
12794
12795                                 specs["num_data_points"]        = de::toString(numDataPoints);
12796                                 specs["tt_dst"]                         = dstType.typeName;
12797                                 specs["tt_src0"]                        = src0Type.typeName;
12798                                 specs["tt_src1"]                        = src1Type.typeName;
12799                                 specs["case_bodies"]            = caseBodies;
12800                                 specs["case_list"]                      = caseList;
12801                                 specs["case_count"]                     = de::toString(caseCount);
12802
12803                                 fragments["capability"]         = "OpCapability Float16\n";
12804                                 fragments["decoration"]         = decoration.specialize(specs);
12805                                 fragments["pre_main"]           = preMain.specialize(specs);
12806                                 fragments["testfun"]            = testFun.specialize(specs);
12807                                 fragments["testfun"]            += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12808                                 fragments["testfun"]            += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12809                                 fragments["testfun"]            += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12810
12811                                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12812                                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12813                                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12814                                 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12815
12816                                 extensions.push_back("VK_KHR_shader_float16_int8");
12817
12818                                 features.extFloat16Int8.shaderFloat16 = true;
12819
12820                                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12821                         }
12822                 }
12823         }
12824
12825         return testGroup.release();
12826 }
12827
12828 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12829 {
12830         if (inputs.size() != 1 || outputAllocs.size() != 1)
12831                 return false;
12832
12833         vector<deUint8> input1Bytes;
12834
12835         inputs[0].getBytes(input1Bytes);
12836
12837         DE_ASSERT(input1Bytes.size() > 0);
12838         DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12839
12840         const size_t                    iterations              = input1Bytes.size() / sizeof(deFloat16);
12841         const deFloat16* const  input1AsFP16    = (const deFloat16*)&input1Bytes[0];
12842         const deFloat16* const  outputAsFP16    = (const deFloat16*)outputAllocs[0]->getHostPtr();
12843         const deFloat16                 exceptionValue  = tcu::Float16(-1.0).bits();
12844         std::string                             error;
12845
12846         for (size_t idx = 0; idx < iterations; ++idx)
12847         {
12848                 if (input1AsFP16[idx] == exceptionValue)
12849                         continue;
12850
12851                 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12852                 {
12853                         log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12854
12855                         return false;
12856                 }
12857         }
12858
12859         return true;
12860 }
12861
12862 template<class SpecResource>
12863 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12864 {
12865         de::MovePtr<tcu::TestCaseGroup>         testGroup                               (new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12866         const deUint32                                          numElements                             = 8;
12867         const string                                            testName                                = "struct";
12868         const deUint32                                          structItemsCount                = 88;
12869         const deUint32                                          exceptionIndices[]              = { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12870         const deFloat16                                         exceptionValue                  = tcu::Float16(-1.0).bits();
12871         const deUint32                                          fieldModifier                   = 2;
12872         const deUint32                                          fieldModifiedMulIndex   = 60;
12873         const deUint32                                          fieldModifiedAddIndex   = 66;
12874
12875         const StringTemplate preMain
12876         (
12877                 "    %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12878                 "          %f16 = OpTypeFloat 16\n"
12879                 "        %v2f16 = OpTypeVector %f16 2\n"
12880                 "        %v3f16 = OpTypeVector %f16 3\n"
12881                 "        %v4f16 = OpTypeVector %f16 4\n"
12882                 "    %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12883
12884                 "${consts}"
12885
12886                 "     %c_f16_n1 = OpConstant %f16 -1.0\n"
12887                 "   %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12888                 "      %c_u32_5 = OpConstant %u32 5\n"
12889                 "      %c_u32_6 = OpConstant %u32 6\n"
12890                 "      %c_u32_7 = OpConstant %u32 7\n"
12891                 "      %c_u32_8 = OpConstant %u32 8\n"
12892                 "      %c_u32_9 = OpConstant %u32 9\n"
12893                 "     %c_u32_10 = OpConstant %u32 10\n"
12894                 "     %c_u32_11 = OpConstant %u32 11\n"
12895                 "     %c_u32_12 = OpConstant %u32 12\n"
12896                 "     %c_u32_13 = OpConstant %u32 13\n"
12897                 "     %c_u32_14 = OpConstant %u32 14\n"
12898                 "     %c_u32_15 = OpConstant %u32 15\n"
12899                 "     %c_u32_16 = OpConstant %u32 16\n"
12900                 "     %c_u32_17 = OpConstant %u32 17\n"
12901                 "     %c_u32_18 = OpConstant %u32 18\n"
12902                 "     %c_u32_19 = OpConstant %u32 19\n"
12903                 "     %c_u32_20 = OpConstant %u32 20\n"
12904                 "     %c_u32_21 = OpConstant %u32 21\n"
12905                 "     %c_u32_22 = OpConstant %u32 22\n"
12906                 "     %c_u32_23 = OpConstant %u32 23\n"
12907                 "     %c_u32_24 = OpConstant %u32 24\n"
12908                 "     %c_u32_25 = OpConstant %u32 25\n"
12909                 "     %c_u32_26 = OpConstant %u32 26\n"
12910                 "     %c_u32_27 = OpConstant %u32 27\n"
12911                 "     %c_u32_28 = OpConstant %u32 28\n"
12912                 "     %c_u32_29 = OpConstant %u32 29\n"
12913                 "     %c_u32_30 = OpConstant %u32 30\n"
12914                 "     %c_u32_31 = OpConstant %u32 31\n"
12915                 "     %c_u32_33 = OpConstant %u32 33\n"
12916                 "     %c_u32_34 = OpConstant %u32 34\n"
12917                 "     %c_u32_35 = OpConstant %u32 35\n"
12918                 "     %c_u32_36 = OpConstant %u32 36\n"
12919                 "     %c_u32_37 = OpConstant %u32 37\n"
12920                 "     %c_u32_38 = OpConstant %u32 38\n"
12921                 "     %c_u32_39 = OpConstant %u32 39\n"
12922                 "     %c_u32_40 = OpConstant %u32 40\n"
12923                 "     %c_u32_41 = OpConstant %u32 41\n"
12924                 "     %c_u32_44 = OpConstant %u32 44\n"
12925
12926                 " %f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12927                 " %v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12928                 " %v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12929                 " %v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12930                 " %v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12931                 " %struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12932                 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12933                 " %st_test      = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12934
12935                 "       %up_u32 = OpTypePointer Uniform %u32\n"
12936                 "    %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
12937                 "    %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
12938                 "      %SSBO_st = OpTypeStruct %ra_ra_u32\n"
12939                 "   %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12940
12941                 "     %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12942         );
12943
12944         const StringTemplate decoration
12945         (
12946                 "OpDecorate %SSBO_st BufferBlock\n"
12947                 "OpDecorate %ra_u32_44 ArrayStride 4\n"
12948                 "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
12949                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12950                 "OpDecorate %ssbo_dst Binding 1\n"
12951
12952                 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
12953
12954                 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12955                 "OpMemberDecorate %struct16 0 Offset 0\n"
12956                 "OpMemberDecorate %struct16 1 Offset 4\n"
12957                 "OpDecorate %struct16arr3 ArrayStride 16\n"
12958                 "OpDecorate %f16arr3 ArrayStride 2\n"
12959                 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12960                 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12961                 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12962
12963                 "OpMemberDecorate %st_test 0 Offset 0\n"
12964                 "OpMemberDecorate %st_test 1 Offset 4\n"
12965                 "OpMemberDecorate %st_test 2 Offset 8\n"
12966                 "OpMemberDecorate %st_test 3 Offset 16\n"
12967                 "OpMemberDecorate %st_test 4 Offset 24\n"
12968                 "OpMemberDecorate %st_test 5 Offset 32\n"
12969                 "OpMemberDecorate %st_test 6 Offset 80\n"
12970                 "OpMemberDecorate %st_test 7 Offset 100\n"
12971                 "OpMemberDecorate %st_test 8 Offset 104\n"
12972                 "OpMemberDecorate %st_test 9 Offset 144\n"
12973         );
12974
12975         const StringTemplate testFun
12976         (
12977                 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12978                 "     %param = OpFunctionParameter %v4f32\n"
12979                 "     %entry = OpLabel\n"
12980
12981                 "         %i = OpVariable %fp_i32 Function\n"
12982                 "              OpStore %i %c_i32_0\n"
12983
12984                 "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12985                 "              OpSelectionMerge %end_if None\n"
12986                 "              OpBranchConditional %will_run %run_test %end_if\n"
12987
12988                 "  %run_test = OpLabel\n"
12989                 "              OpBranch %loop\n"
12990
12991                 "      %loop = OpLabel\n"
12992                 "     %i_cmp = OpLoad %i32 %i\n"
12993                 "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12994                 "              OpLoopMerge %merge %next None\n"
12995                 "              OpBranchConditional %lt %write %merge\n"
12996
12997                 "     %write = OpLabel\n"
12998                 "       %ndx = OpLoad %i32 %i\n"
12999
13000                 "      %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
13001                 "      %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
13002                 "      %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
13003
13004                 "      %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
13005
13006                 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
13007                 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
13008                 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
13009                 "  %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
13010                 "    %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
13011
13012                 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
13013                 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
13014                 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13015                 "  %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13016                 "    %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13017
13018                 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13019                 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13020                 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13021                 "  %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13022                 "    %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13023
13024                 "      %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13025
13026                 "    %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13027                 "    %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13028                 "    %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13029                 "    %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13030                 "    %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13031                 "      %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13032
13033                 "      %fndx = OpConvertSToF %f16 %ndx\n"
13034                 "  %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13035                 "  %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13036
13037                 "   %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13038                 "   %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13039                 "    %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13040                 "    %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13041                 "    %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13042                 "    %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13043                 "    %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13044                 "      %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13045
13046                 "    %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13047                 "    %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13048                 "    %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13049                 "      %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13050
13051                 "    %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
13052
13053                 // Storage section: all elements that are not directly accessed should
13054                 // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13055                 // is constructed with one element from a constant -1.0.
13056                 // half offset 0
13057                 "      %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13058                 "     %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13059                 "      %bc_0 = OpBitcast %u32 %vec_0\n"
13060                 "     %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13061                 "              OpStore %gep_0 %bc_0\n"
13062
13063                 // <2 x half> offset 4
13064                 "      %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13065                 "      %bc_1 = OpBitcast %u32 %ex_1\n"
13066                 "     %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13067                 "              OpStore %gep_1 %bc_1\n"
13068
13069                 // <3 x half> offset 8
13070                 "      %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13071                 "    %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13072                 "    %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13073                 "    %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13074                 "    %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13075                 "   %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13076                 "   %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13077                 "              OpStore %gep_2_0 %bc_2_0\n"
13078                 "              OpStore %gep_2_1 %bc_2_1\n"
13079
13080                 // <4 x half> offset 16
13081                 "      %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13082                 "    %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13083                 "    %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13084                 "    %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13085                 "    %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13086                 "   %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13087                 "   %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13088                 "              OpStore %gep_3_0 %bc_3_0\n"
13089                 "              OpStore %gep_3_1 %bc_3_1\n"
13090
13091                 // [3 x half] offset 24
13092                 "    %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13093                 "    %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13094                 "    %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13095                 "   %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13096                 "   %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13097                 "    %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13098                 "    %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13099                 "   %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13100                 "   %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13101                 "              OpStore %gep_4_0 %bc_4_0\n"
13102                 "              OpStore %gep_4_1 %bc_4_1\n"
13103
13104                 // [3 x {half, [3 x <2 x half>]}] offset 32
13105                 "    %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13106                 "    %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13107                 "    %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13108                 "  %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13109                 "  %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13110                 "  %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13111                 "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13112                 "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13113                 "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13114                 "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13115                 "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13116                 "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13117                 "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13118                 "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13119                 "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13120                 " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13121                 " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13122                 " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13123                 "  %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13124                 "  %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13125                 "  %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13126                 "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13127                 "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13128                 "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13129                 "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13130                 "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13131                 "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13132                 "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13133                 "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13134                 "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13135                 "  %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13136                 "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13137                 "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13138                 "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13139                 "  %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13140                 "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13141                 "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13142                 "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13143                 "  %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13144                 "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13145                 "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13146                 "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13147                 "              OpStore %gep_5_0_0 %bc_5_0_0\n"
13148                 "              OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13149                 "              OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13150                 "              OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13151                 "              OpStore %gep_5_1_0 %bc_5_1_0\n"
13152                 "              OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13153                 "              OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13154                 "              OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13155                 "              OpStore %gep_5_2_0 %bc_5_2_0\n"
13156                 "              OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13157                 "              OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13158                 "              OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13159
13160                 // [5 x <2 x half>] offset 80
13161                 "    %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13162                 "    %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13163                 "    %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13164                 "    %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13165                 "    %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13166                 "    %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13167                 "    %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13168                 "    %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13169                 "    %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13170                 "    %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13171                 "   %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13172                 "   %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13173                 "   %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13174                 "   %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13175                 "   %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13176                 "              OpStore %gep_6_0 %bc_6_0\n"
13177                 "              OpStore %gep_6_1 %bc_6_1\n"
13178                 "              OpStore %gep_6_2 %bc_6_2\n"
13179                 "              OpStore %gep_6_3 %bc_6_3\n"
13180                 "              OpStore %gep_6_4 %bc_6_4\n"
13181
13182                 // half offset 100
13183                 "      %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13184                 "     %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13185                 "      %bc_7 = OpBitcast %u32 %vec_7\n"
13186                 "     %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13187                 "              OpStore %gep_7 %bc_7\n"
13188
13189                 // [5 x <3 x half>] offset 104
13190                 "    %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13191                 "    %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13192                 "    %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13193                 "    %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13194                 "    %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13195                 " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13196                 " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13197                 " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13198                 " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13199                 " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13200                 " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13201                 " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13202                 " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13203                 " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13204                 " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13205                 "  %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13206                 "  %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13207                 "  %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13208                 "  %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13209                 "  %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13210                 "  %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13211                 "  %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13212                 "  %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13213                 "  %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13214                 "  %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13215                 " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13216                 " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13217                 " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13218                 " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13219                 " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13220                 " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13221                 " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13222                 " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13223                 " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13224                 " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13225                 "              OpStore %gep_8_0_0 %bc_8_0_0\n"
13226                 "              OpStore %gep_8_0_1 %bc_8_0_1\n"
13227                 "              OpStore %gep_8_1_0 %bc_8_1_0\n"
13228                 "              OpStore %gep_8_1_1 %bc_8_1_1\n"
13229                 "              OpStore %gep_8_2_0 %bc_8_2_0\n"
13230                 "              OpStore %gep_8_2_1 %bc_8_2_1\n"
13231                 "              OpStore %gep_8_3_0 %bc_8_3_0\n"
13232                 "              OpStore %gep_8_3_1 %bc_8_3_1\n"
13233                 "              OpStore %gep_8_4_0 %bc_8_4_0\n"
13234                 "              OpStore %gep_8_4_1 %bc_8_4_1\n"
13235
13236                 // [3 x <4 x half>] offset 144
13237                 "    %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13238                 "    %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13239                 "    %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13240                 " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13241                 " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13242                 " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13243                 " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13244                 " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13245                 " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13246                 "  %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13247                 "  %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13248                 "  %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13249                 "  %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13250                 "  %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13251                 "  %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13252                 " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13253                 " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13254                 " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13255                 " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13256                 " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13257                 " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13258                 "              OpStore %gep_9_0_0 %bc_9_0_0\n"
13259                 "              OpStore %gep_9_0_1 %bc_9_0_1\n"
13260                 "              OpStore %gep_9_1_0 %bc_9_1_0\n"
13261                 "              OpStore %gep_9_1_1 %bc_9_1_1\n"
13262                 "              OpStore %gep_9_2_0 %bc_9_2_0\n"
13263                 "              OpStore %gep_9_2_1 %bc_9_2_1\n"
13264
13265                 "              OpBranch %next\n"
13266
13267                 "      %next = OpLabel\n"
13268                 "     %i_cur = OpLoad %i32 %i\n"
13269                 "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13270                 "              OpStore %i %i_new\n"
13271                 "              OpBranch %loop\n"
13272
13273                 "     %merge = OpLabel\n"
13274                 "              OpBranch %end_if\n"
13275                 "    %end_if = OpLabel\n"
13276                 "              OpReturnValue %param\n"
13277                 "              OpFunctionEnd\n"
13278         );
13279
13280         {
13281                 SpecResource            specResource;
13282                 map<string, string>     specs;
13283                 VulkanFeatures          features;
13284                 map<string, string>     fragments;
13285                 vector<string>          extensions;
13286                 vector<deFloat16>       expectedOutput;
13287                 string                          consts;
13288
13289                 for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
13290                 {
13291                         vector<deFloat16>       expectedIterationOutput;
13292
13293                         for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13294                                 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13295
13296                         for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13297                                 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13298
13299                         expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13300                         expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13301
13302                         expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13303                 }
13304
13305                 for (deUint32 i = 0; i < structItemsCount; ++i)
13306                         consts += "     %c_f16_" + de::toString(i) + " = OpConstant %f16 "  + de::toString(i) + "\n";
13307
13308                 specs["num_elements"]           = de::toString(numElements);
13309                 specs["struct_item_size"]       = de::toString(structItemsCount * sizeof(deFloat16));
13310                 specs["field_modifier"]         = de::toString(fieldModifier);
13311                 specs["consts"]                         = consts;
13312
13313                 fragments["capability"]         = "OpCapability Float16\n";
13314                 fragments["decoration"]         = decoration.specialize(specs);
13315                 fragments["pre_main"]           = preMain.specialize(specs);
13316                 fragments["testfun"]            = testFun.specialize(specs);
13317
13318                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13319                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13320                 specResource.verifyIO = compareFP16CompositeFunc;
13321
13322                 extensions.push_back("VK_KHR_shader_float16_int8");
13323
13324                 features.extFloat16Int8.shaderFloat16 = true;
13325
13326                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13327         }
13328
13329         return testGroup.release();
13330 }
13331
13332 template<class SpecResource>
13333 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
13334 {
13335         de::MovePtr<tcu::TestCaseGroup>         testGroup               (new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
13336         const deFloat16                                         exceptionValue  = tcu::Float16(-1.0).bits();
13337         const string                                            opName                  (op);
13338         const deUint32                                          opIndex                 = (opName == "OpCompositeInsert") ? 0
13339                                                                                                                 : (opName == "OpCompositeExtract") ? 1
13340                                                                                                                 : std::numeric_limits<deUint32>::max();
13341
13342         const StringTemplate preMain
13343         (
13344                 "   %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13345                 "  %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13346                 "  %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13347                 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13348                 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13349                 "         %f16 = OpTypeFloat 16\n"
13350                 "       %v2f16 = OpTypeVector %f16 2\n"
13351                 "       %v3f16 = OpTypeVector %f16 3\n"
13352                 "       %v4f16 = OpTypeVector %f16 4\n"
13353                 "    %c_f16_na = OpConstant %f16 -1.0\n"
13354                 "  %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13355                 "     %c_u32_5 = OpConstant %u32 5\n"
13356                 "     %c_i32_5 = OpConstant %i32 5\n"
13357                 "     %c_i32_6 = OpConstant %i32 6\n"
13358                 "     %c_i32_7 = OpConstant %i32 7\n"
13359                 "     %c_i32_8 = OpConstant %i32 8\n"
13360                 "     %c_i32_9 = OpConstant %i32 9\n"
13361                 "    %c_i32_10 = OpConstant %i32 10\n"
13362                 "    %c_i32_11 = OpConstant %i32 11\n"
13363
13364                 "%f16arr3      = OpTypeArray %f16 %c_u32_3\n"
13365                 "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
13366                 "%v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
13367                 "%v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
13368                 "%v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
13369                 "%struct16     = OpTypeStruct %f16 %v2f16arr3\n"
13370                 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13371                 "%st_test      = OpTypeStruct %${field_type}\n"
13372
13373                 "      %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13374                 "       %ra_st = OpTypeArray %u32 %c_i32_size\n"
13375                 "      %up_u32 = OpTypePointer Uniform %u32\n"
13376                 "     %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13377                 "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13378                 "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13379                 "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13380                 "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13381                 "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13382
13383                 "${op_premain_decls}"
13384
13385                 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13386                 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13387
13388                 "    %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13389                 "    %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
13390         );
13391
13392         const StringTemplate decoration
13393         (
13394                 "OpDecorate %SSBO_src BufferBlock\n"
13395                 "OpDecorate %SSBO_dst BufferBlock\n"
13396                 "OpDecorate %ra_f16 ArrayStride 4\n"
13397                 "OpDecorate %ra_st ArrayStride 4\n"
13398                 "OpDecorate %ssbo_src DescriptorSet 0\n"
13399                 "OpDecorate %ssbo_src Binding 0\n"
13400                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13401                 "OpDecorate %ssbo_dst Binding 1\n"
13402
13403                 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
13404                 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13405
13406                 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13407                 "OpMemberDecorate %struct16 0 Offset 0\n"
13408                 "OpMemberDecorate %struct16 1 Offset 4\n"
13409                 "OpDecorate %struct16arr3 ArrayStride 16\n"
13410                 "OpDecorate %f16arr3 ArrayStride 2\n"
13411                 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13412                 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13413                 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13414
13415                 "OpMemberDecorate %st_test 0 Offset 0\n"
13416         );
13417
13418         const StringTemplate testFun
13419         (
13420                 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13421                 "     %param = OpFunctionParameter %v4f32\n"
13422                 "     %entry = OpLabel\n"
13423
13424                 "         %i = OpVariable %fp_i32 Function\n"
13425                 "              OpStore %i %c_i32_0\n"
13426
13427                 "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13428                 "              OpSelectionMerge %end_if None\n"
13429                 "              OpBranchConditional %will_run %run_test %end_if\n"
13430
13431                 "  %run_test = OpLabel\n"
13432                 "              OpBranch %loop\n"
13433
13434                 "      %loop = OpLabel\n"
13435                 "     %i_cmp = OpLoad %i32 %i\n"
13436                 "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13437                 "              OpLoopMerge %merge %next None\n"
13438                 "              OpBranchConditional %lt %write %merge\n"
13439
13440                 "     %write = OpLabel\n"
13441                 "       %ndx = OpLoad %i32 %i\n"
13442
13443                 "${op_sw_fun_call}"
13444
13445                 "    %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13446                 "              OpBranch %next\n"
13447
13448                 "      %next = OpLabel\n"
13449                 "     %i_cur = OpLoad %i32 %i\n"
13450                 "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13451                 "              OpStore %i %i_new\n"
13452                 "              OpBranch %loop\n"
13453
13454                 "     %merge = OpLabel\n"
13455                 "              OpBranch %end_if\n"
13456                 "    %end_if = OpLabel\n"
13457                 "              OpReturnValue %param\n"
13458                 "              OpFunctionEnd\n"
13459
13460                 "${op_sw_fun_header}"
13461                 " %sw_param = OpFunctionParameter %st_test\n"
13462                 "%sw_paramn = OpFunctionParameter %i32\n"
13463                 " %sw_entry = OpLabel\n"
13464                 "             OpSelectionMerge %switch_e None\n"
13465                 "             OpSwitch %sw_paramn %default ${case_list}\n"
13466
13467                 "${case_bodies}"
13468
13469                 "%default   = OpLabel\n"
13470                 "             OpReturnValue ${op_case_default_value}\n"
13471                 "%switch_e  = OpLabel\n"
13472                 "             OpUnreachable\n" // Unreachable merge block for switch statement
13473                 "             OpFunctionEnd\n"
13474         );
13475
13476         const StringTemplate testCaseBody
13477         (
13478                 "%case_${case_ndx}    = OpLabel\n"
13479                 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13480                 "             OpReturnValue %val_ret_${case_ndx}\n"
13481         );
13482
13483         const string loadF16
13484         (
13485                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13486                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13487                 "  %ld_${var}_entry = OpLabel\n"
13488                 "   %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13489                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13490                 "                     OpReturnValue %ld_${var}_st_test\n"
13491                 "                     OpFunctionEnd\n" +
13492                 loadScalarF16FromUint
13493         );
13494
13495         const string loadV2F16
13496         (
13497                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13498                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13499                 "  %ld_${var}_entry = OpLabel\n"
13500                 "   %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13501                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13502                 "                     OpReturnValue %ld_${var}_st_test\n"
13503                 "                     OpFunctionEnd\n" +
13504                 loadV2F16FromUint
13505         );
13506
13507         const string loadV3F16
13508         (
13509                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13510                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13511                 "  %ld_${var}_entry = OpLabel\n"
13512                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13513                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13514                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13515                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13516                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13517                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13518                 "    %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13519                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13520                 "                     OpReturnValue %ld_${var}_st_test\n"
13521                 "                     OpFunctionEnd\n"
13522         );
13523
13524         const string loadV4F16
13525         (
13526                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13527                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13528                 "  %ld_${var}_entry = OpLabel\n"
13529                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13530                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13531                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13532                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13533                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13534                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13535                 "    %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13536                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13537                 "                     OpReturnValue %ld_${var}_st_test\n"
13538                 "                     OpFunctionEnd\n"
13539         );
13540
13541         const string loadF16Arr3
13542         (
13543                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13544                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13545                 "  %ld_${var}_entry = OpLabel\n"
13546                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13547                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13548                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13549                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13550                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13551                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13552                 "   %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13553                 "   %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13554                 "   %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13555                 "   %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13556                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13557                 "                     OpReturnValue %ld_${var}_st_test\n"
13558                 "                     OpFunctionEnd\n"
13559         );
13560
13561         const string loadV2F16Arr5
13562         (
13563                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13564                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13565                 "  %ld_${var}_label = OpLabel\n"
13566                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13567                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13568                 "  %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13569                 "  %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13570                 "  %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13571                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13572                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13573                 "   %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13574                 "   %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13575                 "   %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13576                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13577                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13578                 "   %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13579                 "   %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13580                 "   %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13581                 "   %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13582                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13583                 "                     OpReturnValue %ld_${var}_st_test\n"
13584                 "                     OpFunctionEnd\n"
13585         );
13586
13587         const string loadV3F16Arr5
13588         (
13589                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13590                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13591                 "  %ld_${var}_entry = OpLabel\n"
13592                 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13593                 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13594                 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13595                 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13596                 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13597                 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13598                 "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13599                 "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13600                 "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13601                 "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13602                 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13603                 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13604                 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13605                 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13606                 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13607                 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13608                 " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13609                 " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13610                 " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13611                 " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13612                 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13613                 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13614                 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13615                 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13616                 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13617                 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13618                 " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13619                 " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13620                 " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13621                 " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13622                 "  %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13623                 "  %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13624                 "  %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13625                 "  %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13626                 "  %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13627                 "   %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13628                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13629                 "                     OpReturnValue %ld_${var}_st_test\n"
13630                 "                     OpFunctionEnd\n"
13631         );
13632
13633         const string loadV4F16Arr3
13634         (
13635                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13636                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13637                 "  %ld_${var}_entry = OpLabel\n"
13638                 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13639                 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13640                 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13641                 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13642                 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13643                 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13644                 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13645                 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13646                 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13647                 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13648                 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13649                 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13650                 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13651                 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13652                 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13653                 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13654                 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13655                 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13656                 "  %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13657                 "  %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13658                 "  %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13659                 "   %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13660                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13661                 "                     OpReturnValue %ld_${var}_st_test\n"
13662                 "                     OpFunctionEnd\n"
13663         );
13664
13665         const string loadStruct16Arr3
13666         (
13667                 "          %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13668                 "    %ld_${var}_param = OpFunctionParameter %i32\n"
13669                 "    %ld_${var}_entry = OpLabel\n"
13670                 "%ld_${var}_gep_0_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13671                 "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13672                 "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13673                 "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13674                 "%ld_${var}_gep_1_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13675                 "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13676                 "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13677                 "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13678                 "%ld_${var}_gep_2_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13679                 "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13680                 "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13681                 "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13682                 " %ld_${var}_ld_0_0   = OpLoad %u32 %ld_${var}_gep_0_0\n"
13683                 " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13684                 " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13685                 " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13686                 " %ld_${var}_ld_1_0   = OpLoad %u32 %ld_${var}_gep_1_0\n"
13687                 " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13688                 " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13689                 " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13690                 " %ld_${var}_ld_2_0   = OpLoad %u32 %ld_${var}_gep_2_0\n"
13691                 " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13692                 " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13693                 " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13694                 " %ld_${var}_bc_0_0   = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13695                 " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13696                 " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13697                 " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13698                 " %ld_${var}_bc_1_0   = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13699                 " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13700                 " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13701                 " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13702                 " %ld_${var}_bc_2_0   = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13703                 " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13704                 " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13705                 " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13706                 "    %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13707                 "    %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13708                 "    %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13709                 "     %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13710                 "     %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13711                 "     %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13712                 "     %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13713                 "     %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13714                 "     %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13715                 "     %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13716                 "  %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13717                 "                       OpReturnValue %ld_${var}_st_test\n"
13718                 "                      OpFunctionEnd\n"
13719         );
13720
13721         const string storeF16
13722         (
13723                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13724                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13725                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13726                 " %st_${var}_entry = OpLabel\n"
13727                 "    %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13728                 "  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13729                 "                    OpReturn\n"
13730                 "                    OpFunctionEnd\n" +
13731                 storeScalarF16AsUint
13732         );
13733
13734         const string storeV2F16
13735         (
13736                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13737                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13738                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13739                 " %st_${var}_entry = OpLabel\n"
13740                 "    %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13741                 "  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13742                 "                    OpReturn\n"
13743                 "                    OpFunctionEnd\n" +
13744                 storeV2F16AsUint
13745         );
13746
13747         const string storeV3F16
13748         (
13749                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13750                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13751                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13752                 " %st_${var}_entry = OpLabel\n"
13753                 "    %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13754                 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13755                 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13756                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13757                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13758                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13759                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13760                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13761                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13762                 "                    OpReturn\n"
13763                 "                    OpFunctionEnd\n"
13764         );
13765
13766         const string storeV4F16
13767         (
13768                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13769                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13770                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13771                 " %st_${var}_entry = OpLabel\n"
13772                 "    %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13773                 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13774                 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13775                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13776                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13777                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13778                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13779                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13780                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13781                 "                    OpReturn\n"
13782                 "                    OpFunctionEnd\n"
13783         );
13784
13785         const string storeF16Arr3
13786         (
13787                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13788                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13789                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13790                 " %st_${var}_entry = OpLabel\n"
13791                 "  %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13792                 "  %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13793                 "  %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13794                 " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13795                 " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13796                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13797                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13798                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13799                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13800                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13801                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13802                 "                    OpReturn\n"
13803                 "                    OpFunctionEnd\n"
13804         );
13805
13806         const string storeV2F16Arr5
13807         (
13808                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13809                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13810                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13811                 " %st_${var}_entry = OpLabel\n"
13812                 "  %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13813                 "  %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13814                 "  %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13815                 "  %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13816                 "  %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13817                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13818                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13819                 "  %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13820                 "  %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13821                 "  %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13822                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13823                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13824                 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13825                 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13826                 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13827                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13828                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13829                 "                    OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13830                 "                    OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13831                 "                    OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13832                 "                    OpReturn\n"
13833                 "                    OpFunctionEnd\n"
13834         );
13835
13836         const string storeV3F16Arr5
13837         (
13838                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13839                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13840                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13841                 " %st_${var}_entry = OpLabel\n"
13842                 "  %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13843                 "  %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13844                 "  %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13845                 "  %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13846                 "  %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13847                 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13848                 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13849                 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13850                 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13851                 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13852                 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13853                 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13854                 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13855                 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13856                 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13857                 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13858                 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13859                 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13860                 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13861                 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13862                 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13863                 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13864                 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13865                 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13866                 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13867                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13868                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13869                 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13870                 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13871                 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13872                 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13873                 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13874                 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13875                 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13876                 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13877                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13878                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13879                 "                    OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13880                 "                    OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13881                 "                    OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13882                 "                    OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13883                 "                    OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13884                 "                    OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13885                 "                    OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13886                 "                    OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13887                 "                    OpReturn\n"
13888                 "                    OpFunctionEnd\n"
13889         );
13890
13891         const string storeV4F16Arr3
13892         (
13893                 "        %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13894                 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13895                 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13896                 "  %st_${var}_entry = OpLabel\n"
13897                 "   %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13898                 "   %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13899                 "   %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13900                 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13901                 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
13902                 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
13903                 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
13904                 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
13905                 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
13906                 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
13907                 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
13908                 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
13909                 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
13910                 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
13911                 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
13912                 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13913                 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13914                 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13915                 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13916                 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13917                 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13918                 "                     OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
13919                 "                     OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
13920                 "                     OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
13921                 "                     OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
13922                 "                     OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
13923                 "                     OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
13924                 "                     OpReturn\n"
13925                 "                     OpFunctionEnd\n"
13926         );
13927
13928         const string storeStruct16Arr3
13929         (
13930                 "          %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13931                 "   %st_${var}_param1 = OpFunctionParameter %st_test\n"
13932                 "   %st_${var}_param2 = OpFunctionParameter %i32\n"
13933                 "    %st_${var}_entry = OpLabel\n"
13934                 "     %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
13935                 "     %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
13936                 "     %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
13937                 "   %st_${var}_el_0   = OpCompositeExtract   %f16 %st_${var}_st_0 0\n"
13938                 "   %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
13939                 "   %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
13940                 "   %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
13941                 "   %st_${var}_el_1   = OpCompositeExtract   %f16 %st_${var}_st_1 0\n"
13942                 "   %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
13943                 "   %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
13944                 "   %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
13945                 "   %st_${var}_el_2   = OpCompositeExtract   %f16 %st_${var}_st_2 0\n"
13946                 "   %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
13947                 "   %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
13948                 "   %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
13949                 "     %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
13950                 "     %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
13951                 "     %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
13952                 "   %st_${var}_bc_0   = OpBitcast %u32 %st_${var}_v2_0\n"
13953                 "   %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13954                 "   %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13955                 "   %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
13956                 "   %st_${var}_bc_1   = OpBitcast %u32 %st_${var}_v2_1\n"
13957                 "   %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13958                 "   %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13959                 "   %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
13960                 "   %st_${var}_bc_2   = OpBitcast %u32 %st_${var}_v2_2\n"
13961                 "   %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13962                 "   %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13963                 "   %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
13964                 "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13965                 "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13966                 "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13967                 "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13968                 "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13969                 "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13970                 "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13971                 "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13972                 "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13973                 "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13974                 "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13975                 "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13976                 "                       OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
13977                 "                       OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
13978                 "                       OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
13979                 "                       OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
13980                 "                       OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
13981                 "                       OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
13982                 "                       OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
13983                 "                       OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
13984                 "                       OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
13985                 "                       OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
13986                 "                       OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
13987                 "                       OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
13988                 "                       OpReturn\n"
13989                 "                       OpFunctionEnd\n"
13990         );
13991
13992         struct OpParts
13993         {
13994                 const char*     premainDecls;
13995                 const char*     swFunCall;
13996                 const char*     swFunHeader;
13997                 const char*     caseDefaultValue;
13998                 const char*     argsPartial;
13999         };
14000
14001         OpParts                                                         opPartsArray[]                  =
14002         {
14003                 // OpCompositeInsert
14004                 {
14005                         "       %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
14006                         "    %SSBO_src = OpTypeStruct %ra_f16\n"
14007                         "    %SSBO_dst = OpTypeStruct %ra_st\n",
14008
14009                         "   %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
14010                         "   %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
14011                         "   %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
14012
14013                         "   %sw_fun = OpFunction %st_test None %fun_t\n"
14014                         "%sw_paramv = OpFunctionParameter %f16\n",
14015
14016                         "%sw_param",
14017
14018                         "%st_test %sw_paramv %sw_param",
14019                 },
14020                 // OpCompositeExtract
14021                 {
14022                         "       %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14023                         "    %SSBO_src = OpTypeStruct %ra_st\n"
14024                         "    %SSBO_dst = OpTypeStruct %ra_f16\n",
14025
14026                         "   %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14027                         "   %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14028
14029                         "   %sw_fun = OpFunction %f16 None %fun_t\n",
14030
14031                         "%c_f16_na",
14032
14033                         "%f16 %sw_param",
14034                 },
14035         };
14036
14037         DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14038
14039         const char*     accessPathF16[] =
14040         {
14041                 "0",                    // %f16
14042                 DE_NULL,
14043         };
14044         const char*     accessPathV2F16[] =
14045         {
14046                 "0 0",                  // %v2f16
14047                 "0 1",
14048         };
14049         const char*     accessPathV3F16[] =
14050         {
14051                 "0 0",                  // %v3f16
14052                 "0 1",
14053                 "0 2",
14054                 DE_NULL,
14055         };
14056         const char*     accessPathV4F16[] =
14057         {
14058                 "0 0",                  // %v4f16"
14059                 "0 1",
14060                 "0 2",
14061                 "0 3",
14062         };
14063         const char*     accessPathF16Arr3[] =
14064         {
14065                 "0 0",                  // %f16arr3
14066                 "0 1",
14067                 "0 2",
14068                 DE_NULL,
14069         };
14070         const char*     accessPathStruct16Arr3[] =
14071         {
14072                 "0 0 0",                // %struct16arr3
14073                 DE_NULL,
14074                 "0 0 1 0 0",
14075                 "0 0 1 0 1",
14076                 "0 0 1 1 0",
14077                 "0 0 1 1 1",
14078                 "0 0 1 2 0",
14079                 "0 0 1 2 1",
14080                 "0 1 0",
14081                 DE_NULL,
14082                 "0 1 1 0 0",
14083                 "0 1 1 0 1",
14084                 "0 1 1 1 0",
14085                 "0 1 1 1 1",
14086                 "0 1 1 2 0",
14087                 "0 1 1 2 1",
14088                 "0 2 0",
14089                 DE_NULL,
14090                 "0 2 1 0 0",
14091                 "0 2 1 0 1",
14092                 "0 2 1 1 0",
14093                 "0 2 1 1 1",
14094                 "0 2 1 2 0",
14095                 "0 2 1 2 1",
14096         };
14097         const char*     accessPathV2F16Arr5[] =
14098         {
14099                 "0 0 0",                // %v2f16arr5
14100                 "0 0 1",
14101                 "0 1 0",
14102                 "0 1 1",
14103                 "0 2 0",
14104                 "0 2 1",
14105                 "0 3 0",
14106                 "0 3 1",
14107                 "0 4 0",
14108                 "0 4 1",
14109         };
14110         const char*     accessPathV3F16Arr5[] =
14111         {
14112                 "0 0 0",                // %v3f16arr5
14113                 "0 0 1",
14114                 "0 0 2",
14115                 DE_NULL,
14116                 "0 1 0",
14117                 "0 1 1",
14118                 "0 1 2",
14119                 DE_NULL,
14120                 "0 2 0",
14121                 "0 2 1",
14122                 "0 2 2",
14123                 DE_NULL,
14124                 "0 3 0",
14125                 "0 3 1",
14126                 "0 3 2",
14127                 DE_NULL,
14128                 "0 4 0",
14129                 "0 4 1",
14130                 "0 4 2",
14131                 DE_NULL,
14132         };
14133         const char*     accessPathV4F16Arr3[] =
14134         {
14135                 "0 0 0",                // %v4f16arr3
14136                 "0 0 1",
14137                 "0 0 2",
14138                 "0 0 3",
14139                 "0 1 0",
14140                 "0 1 1",
14141                 "0 1 2",
14142                 "0 1 3",
14143                 "0 2 0",
14144                 "0 2 1",
14145                 "0 2 2",
14146                 "0 2 3",
14147                 DE_NULL,
14148                 DE_NULL,
14149                 DE_NULL,
14150                 DE_NULL,
14151         };
14152
14153         struct TypeTestParameters
14154         {
14155                 const char*             name;
14156                 size_t                  accessPathLength;
14157                 const char**    accessPath;
14158                 const string    loadFunction;
14159                 const string    storeFunction;
14160         };
14161
14162         const TypeTestParameters typeTestParameters[] =
14163         {
14164                 {       "f16",                  DE_LENGTH_OF_ARRAY(accessPathF16),                      accessPathF16,                  loadF16,                        storeF16                 },
14165                 {       "v2f16",                DE_LENGTH_OF_ARRAY(accessPathV2F16),            accessPathV2F16,                loadV2F16,                      storeV2F16               },
14166                 {       "v3f16",                DE_LENGTH_OF_ARRAY(accessPathV3F16),            accessPathV3F16,                loadV3F16,                      storeV3F16               },
14167                 {       "v4f16",                DE_LENGTH_OF_ARRAY(accessPathV4F16),            accessPathV4F16,                loadV4F16,                      storeV4F16                },
14168                 {       "f16arr3",              DE_LENGTH_OF_ARRAY(accessPathF16Arr3),          accessPathF16Arr3,              loadF16Arr3,            storeF16Arr3      },
14169                 {       "v2f16arr5",    DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5),        accessPathV2F16Arr5,    loadV2F16Arr5,          storeV2F16Arr5    },
14170                 {       "v3f16arr5",    DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5),        accessPathV3F16Arr5,    loadV3F16Arr5,          storeV3F16Arr5    },
14171                 {       "v4f16arr3",    DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3),        accessPathV4F16Arr3,    loadV4F16Arr3,          storeV4F16Arr3    },
14172                 {       "struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3),     accessPathStruct16Arr3, loadStruct16Arr3,       storeStruct16Arr3},
14173         };
14174
14175         for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14176         {
14177                 const OpParts           opParts                         = opPartsArray[opIndex];
14178                 const string            testName                        = typeTestParameters[typeTestNdx].name;
14179                 const size_t            structItemsCount        = typeTestParameters[typeTestNdx].accessPathLength;
14180                 const char**            accessPath                      = typeTestParameters[typeTestNdx].accessPath;
14181                 SpecResource            specResource;
14182                 map<string, string>     specs;
14183                 VulkanFeatures          features;
14184                 map<string, string>     fragments;
14185                 vector<string>          extensions;
14186                 vector<deFloat16>       inputFP16;
14187                 vector<deFloat16>       unusedFP16Output;
14188
14189                 // Generate values for input
14190                 inputFP16.reserve(structItemsCount);
14191                 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14192                         inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
14193
14194                 unusedFP16Output.resize(structItemsCount);
14195
14196                 // Generate cases for OpSwitch
14197                 {
14198                         string  caseBodies;
14199                         string  caseList;
14200
14201                         for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14202                                 if (accessPath[caseNdx] != DE_NULL)
14203                                 {
14204                                         map<string, string>     specCase;
14205
14206                                         specCase["case_ndx"]            = de::toString(caseNdx);
14207                                         specCase["access_path"]         = accessPath[caseNdx];
14208                                         specCase["op_args_part"]        = opParts.argsPartial;
14209                                         specCase["op_name"]                     = opName;
14210
14211                                         caseBodies      += testCaseBody.specialize(specCase);
14212                                         caseList        += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14213                                 }
14214
14215                         specs["case_bodies"]    = caseBodies;
14216                         specs["case_list"]              = caseList;
14217                 }
14218
14219                 specs["num_elements"]                   = de::toString(structItemsCount);
14220                 specs["field_type"]                             = typeTestParameters[typeTestNdx].name;
14221                 specs["struct_item_size"]               = de::toString(structItemsCount * sizeof(deFloat16));
14222                 specs["struct_u32s"]                    = de::toString(structItemsCount / 2);
14223                 specs["op_premain_decls"]               = opParts.premainDecls;
14224                 specs["op_sw_fun_call"]                 = opParts.swFunCall;
14225                 specs["op_sw_fun_header"]               = opParts.swFunHeader;
14226                 specs["op_case_default_value"]  = opParts.caseDefaultValue;
14227                 if (opIndex == 0) {
14228                         specs["st_call"]                        = "st_ssbo_dst";
14229                         specs["st_ndx"]                         = "c_i32_0";
14230                 } else {
14231                         specs["st_call"]                        = "st_fn_ssbo_dst";
14232                         specs["st_ndx"]                         = "ndx";
14233                 }
14234
14235                 fragments["capability"]         = "OpCapability Float16\n";
14236                 fragments["decoration"]         = decoration.specialize(specs);
14237                 fragments["pre_main"]           = preMain.specialize(specs);
14238                 fragments["testfun"]            = testFun.specialize(specs);
14239                 if (opIndex == 0) {
14240                         fragments["testfun"]            += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14241                         fragments["testfun"]            += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14242                         fragments["testfun"]            += StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14243                 } else {
14244                         fragments["testfun"]            += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14245                         fragments["testfun"]            += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14246                 }
14247
14248                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14249                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(unusedFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14250                 specResource.verifyIO = compareFP16CompositeFunc;
14251
14252                 extensions.push_back("VK_KHR_shader_float16_int8");
14253
14254                 features.extFloat16Int8.shaderFloat16 = true;
14255
14256                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
14257         }
14258
14259         return testGroup.release();
14260 }
14261
14262 struct fp16PerComponent
14263 {
14264         fp16PerComponent()
14265                 : flavor(0)
14266                 , floatFormat16 (-14, 15, 10, true)
14267                 , outCompCount(0)
14268                 , argCompCount(3, 0)
14269         {
14270         }
14271
14272         bool                    callOncePerComponent    ()                                                                      { return true; }
14273         deUint32                getComponentValidity    ()                                                                      { return static_cast<deUint32>(-1); }
14274
14275         virtual double  getULPs                                 (vector<const deFloat16*>&)                     { return 1.0; }
14276         virtual double  getMin                                  (double value, double ulps)                     { return value - floatFormat16.ulp(deAbs(value), ulps); }
14277         virtual double  getMax                                  (double value, double ulps)                     { return value + floatFormat16.ulp(deAbs(value), ulps); }
14278
14279         virtual size_t  getFlavorCount                  ()                                                                      { return flavorNames.empty() ? 1 : flavorNames.size(); }
14280         virtual void    setFlavor                               (size_t flavorNo)                                       { DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
14281         virtual size_t  getFlavor                               ()                                                                      { return flavor; }
14282         virtual string  getCurrentFlavorName    ()                                                                      { return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
14283
14284         virtual void    setOutCompCount                 (size_t compCount)                                      { outCompCount = compCount; }
14285         virtual size_t  getOutCompCount                 ()                                                                      { return outCompCount; }
14286
14287         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)        { argCompCount[argNo] = compCount; }
14288         virtual size_t  getArgCompCount                 (size_t argNo)                                          { return argCompCount[argNo]; }
14289
14290 protected:
14291         size_t                          flavor;
14292         tcu::FloatFormat        floatFormat16;
14293         size_t                          outCompCount;
14294         vector<size_t>          argCompCount;
14295         vector<string>          flavorNames;
14296 };
14297
14298 struct fp16OpFNegate : public fp16PerComponent
14299 {
14300         template <class fp16type>
14301         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14302         {
14303                 const fp16type  x               (*in[0]);
14304                 const double    d               (x.asDouble());
14305                 const double    result  (0.0 - d);
14306
14307                 out[0] = fp16type(result).bits();
14308                 min[0] = getMin(result, getULPs(in));
14309                 max[0] = getMax(result, getULPs(in));
14310
14311                 return true;
14312         }
14313 };
14314
14315 struct fp16Round : public fp16PerComponent
14316 {
14317         fp16Round() : fp16PerComponent()
14318         {
14319                 flavorNames.push_back("Floor(x+0.5)");
14320                 flavorNames.push_back("Floor(x-0.5)");
14321                 flavorNames.push_back("RoundEven");
14322         }
14323
14324         template<class fp16type>
14325         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14326         {
14327                 const fp16type  x               (*in[0]);
14328                 const double    d               (x.asDouble());
14329                 double                  result  (0.0);
14330
14331                 switch (flavor)
14332                 {
14333                         case 0:         result = deRound(d);            break;
14334                         case 1:         result = deFloor(d - 0.5);      break;
14335                         case 2:         result = deRoundEven(d);        break;
14336                         default:        TCU_THROW(InternalError, "Invalid flavor specified");
14337                 }
14338
14339                 out[0] = fp16type(result).bits();
14340                 min[0] = getMin(result, getULPs(in));
14341                 max[0] = getMax(result, getULPs(in));
14342
14343                 return true;
14344         }
14345 };
14346
14347 struct fp16RoundEven : public fp16PerComponent
14348 {
14349         template<class fp16type>
14350         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14351         {
14352                 const fp16type  x               (*in[0]);
14353                 const double    d               (x.asDouble());
14354                 const double    result  (deRoundEven(d));
14355
14356                 out[0] = fp16type(result).bits();
14357                 min[0] = getMin(result, getULPs(in));
14358                 max[0] = getMax(result, getULPs(in));
14359
14360                 return true;
14361         }
14362 };
14363
14364 struct fp16Trunc : public fp16PerComponent
14365 {
14366         template<class fp16type>
14367         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14368         {
14369                 const fp16type  x               (*in[0]);
14370                 const double    d               (x.asDouble());
14371                 const double    result  (deTrunc(d));
14372
14373                 out[0] = fp16type(result).bits();
14374                 min[0] = getMin(result, getULPs(in));
14375                 max[0] = getMax(result, getULPs(in));
14376
14377                 return true;
14378         }
14379 };
14380
14381 struct fp16FAbs : public fp16PerComponent
14382 {
14383         template<class fp16type>
14384         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14385         {
14386                 const fp16type  x               (*in[0]);
14387                 const double    d               (x.asDouble());
14388                 const double    result  (deAbs(d));
14389
14390                 out[0] = fp16type(result).bits();
14391                 min[0] = getMin(result, getULPs(in));
14392                 max[0] = getMax(result, getULPs(in));
14393
14394                 return true;
14395         }
14396 };
14397
14398 struct fp16FSign : public fp16PerComponent
14399 {
14400         template<class fp16type>
14401         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14402         {
14403                 const fp16type  x               (*in[0]);
14404                 const double    d               (x.asDouble());
14405                 const double    result  (deSign(d));
14406
14407                 if (x.isNaN())
14408                         return false;
14409
14410                 out[0] = fp16type(result).bits();
14411                 min[0] = getMin(result, getULPs(in));
14412                 max[0] = getMax(result, getULPs(in));
14413
14414                 return true;
14415         }
14416 };
14417
14418 struct fp16Floor : public fp16PerComponent
14419 {
14420         template<class fp16type>
14421         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14422         {
14423                 const fp16type  x               (*in[0]);
14424                 const double    d               (x.asDouble());
14425                 const double    result  (deFloor(d));
14426
14427                 out[0] = fp16type(result).bits();
14428                 min[0] = getMin(result, getULPs(in));
14429                 max[0] = getMax(result, getULPs(in));
14430
14431                 return true;
14432         }
14433 };
14434
14435 struct fp16Ceil : public fp16PerComponent
14436 {
14437         template<class fp16type>
14438         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14439         {
14440                 const fp16type  x               (*in[0]);
14441                 const double    d               (x.asDouble());
14442                 const double    result  (deCeil(d));
14443
14444                 out[0] = fp16type(result).bits();
14445                 min[0] = getMin(result, getULPs(in));
14446                 max[0] = getMax(result, getULPs(in));
14447
14448                 return true;
14449         }
14450 };
14451
14452 struct fp16Fract : public fp16PerComponent
14453 {
14454         template<class fp16type>
14455         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14456         {
14457                 const fp16type  x               (*in[0]);
14458                 const double    d               (x.asDouble());
14459                 const double    result  (deFrac(d));
14460
14461                 out[0] = fp16type(result).bits();
14462                 min[0] = getMin(result, getULPs(in));
14463                 max[0] = getMax(result, getULPs(in));
14464
14465                 return true;
14466         }
14467 };
14468
14469 struct fp16Radians : public fp16PerComponent
14470 {
14471         virtual double getULPs (vector<const deFloat16*>& in)
14472         {
14473                 DE_UNREF(in);
14474
14475                 return 2.5;
14476         }
14477
14478         template<class fp16type>
14479         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14480         {
14481                 const fp16type  x               (*in[0]);
14482                 const float             d               (x.asFloat());
14483                 const float             result  (deFloatRadians(d));
14484
14485                 out[0] = fp16type(result).bits();
14486                 min[0] = getMin(result, getULPs(in));
14487                 max[0] = getMax(result, getULPs(in));
14488
14489                 return true;
14490         }
14491 };
14492
14493 struct fp16Degrees : public fp16PerComponent
14494 {
14495         virtual double getULPs (vector<const deFloat16*>& in)
14496         {
14497                 DE_UNREF(in);
14498
14499                 return 2.5;
14500         }
14501
14502         template<class fp16type>
14503         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14504         {
14505                 const fp16type  x               (*in[0]);
14506                 const float             d               (x.asFloat());
14507                 const float             result  (deFloatDegrees(d));
14508
14509                 out[0] = fp16type(result).bits();
14510                 min[0] = getMin(result, getULPs(in));
14511                 max[0] = getMax(result, getULPs(in));
14512
14513                 return true;
14514         }
14515 };
14516
14517 struct fp16Sin : public fp16PerComponent
14518 {
14519         template<class fp16type>
14520         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14521         {
14522                 const fp16type  x                       (*in[0]);
14523                 const double    d                       (x.asDouble());
14524                 const double    result          (deSin(d));
14525                 const double    unspecUlp       (16.0);
14526                 const double    err                     (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14527
14528                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14529                         return false;
14530
14531                 out[0] = fp16type(result).bits();
14532                 min[0] = result - err;
14533                 max[0] = result + err;
14534
14535                 return true;
14536         }
14537 };
14538
14539 struct fp16Cos : public fp16PerComponent
14540 {
14541         template<class fp16type>
14542         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14543         {
14544                 const fp16type  x                       (*in[0]);
14545                 const double    d                       (x.asDouble());
14546                 const double    result          (deCos(d));
14547                 const double    unspecUlp       (16.0);
14548                 const double    err                     (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14549
14550                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14551                         return false;
14552
14553                 out[0] = fp16type(result).bits();
14554                 min[0] = result - err;
14555                 max[0] = result + err;
14556
14557                 return true;
14558         }
14559 };
14560
14561 struct fp16Tan : public fp16PerComponent
14562 {
14563         template<class fp16type>
14564         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14565         {
14566                 const fp16type  x               (*in[0]);
14567                 const double    d               (x.asDouble());
14568                 const double    result  (deTan(d));
14569
14570                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14571                         return false;
14572
14573                 out[0] = fp16type(result).bits();
14574                 {
14575                         const double    err                     = deLdExp(1.0, -7);
14576                         const double    s1                      = deSin(d) + err;
14577                         const double    s2                      = deSin(d) - err;
14578                         const double    c1                      = deCos(d) + err;
14579                         const double    c2                      = deCos(d) - err;
14580                         const double    edgeVals[]      = {s1/c1, s1/c2, s2/c1, s2/c2};
14581                         double                  edgeLeft        = out[0];
14582                         double                  edgeRight       = out[0];
14583
14584                         if (deSign(c1 * c2) < 0.0)
14585                         {
14586                                 edgeLeft        = -std::numeric_limits<double>::infinity();
14587                                 edgeRight       = +std::numeric_limits<double>::infinity();
14588                         }
14589                         else
14590                         {
14591                                 edgeLeft        = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14592                                 edgeRight       = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14593                         }
14594
14595                         min[0] = edgeLeft;
14596                         max[0] = edgeRight;
14597                 }
14598
14599                 return true;
14600         }
14601 };
14602
14603 struct fp16Asin : public fp16PerComponent
14604 {
14605         template<class fp16type>
14606         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14607         {
14608                 const fp16type  x               (*in[0]);
14609                 const double    d               (x.asDouble());
14610                 const double    result  (deAsin(d));
14611                 const double    error   (deAtan2(d, sqrt(1.0 - d * d)));
14612
14613                 if (!x.isNaN() && deAbs(d) > 1.0)
14614                         return false;
14615
14616                 out[0] = fp16type(result).bits();
14617                 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14618                 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14619
14620                 return true;
14621         }
14622 };
14623
14624 struct fp16Acos : public fp16PerComponent
14625 {
14626         template<class fp16type>
14627         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14628         {
14629                 const fp16type  x               (*in[0]);
14630                 const double    d               (x.asDouble());
14631                 const double    result  (deAcos(d));
14632                 const double    error   (deAtan2(sqrt(1.0 - d * d), d));
14633
14634                 if (!x.isNaN() && deAbs(d) > 1.0)
14635                         return false;
14636
14637                 out[0] = fp16type(result).bits();
14638                 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14639                 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14640
14641                 return true;
14642         }
14643 };
14644
14645 struct fp16Atan : public fp16PerComponent
14646 {
14647         virtual double getULPs(vector<const deFloat16*>& in)
14648         {
14649                 DE_UNREF(in);
14650
14651                 return 2 * 5.0; // This is not a precision test. Value is not from spec
14652         }
14653
14654         template<class fp16type>
14655         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14656         {
14657                 const fp16type  x               (*in[0]);
14658                 const double    d               (x.asDouble());
14659                 const double    result  (deAtanOver(d));
14660
14661                 out[0] = fp16type(result).bits();
14662                 min[0] = getMin(result, getULPs(in));
14663                 max[0] = getMax(result, getULPs(in));
14664
14665                 return true;
14666         }
14667 };
14668
14669 struct fp16Sinh : public fp16PerComponent
14670 {
14671         fp16Sinh() : fp16PerComponent()
14672         {
14673                 flavorNames.push_back("Double");
14674                 flavorNames.push_back("ExpFP16");
14675         }
14676
14677         template<class fp16type>
14678         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14679         {
14680                 const fp16type  x               (*in[0]);
14681                 const double    d               (x.asDouble());
14682                 const double    ulps    (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14683                 double                  result  (0.0);
14684                 double                  error   (0.0);
14685
14686                 if (getFlavor() == 0)
14687                 {
14688                         result  = deSinh(d);
14689                         error   = floatFormat16.ulp(deAbs(result), ulps);
14690                 }
14691                 else if (getFlavor() == 1)
14692                 {
14693                         const fp16type  epx     (deExp(d));
14694                         const fp16type  enx     (deExp(-d));
14695                         const fp16type  esx     (epx.asDouble() - enx.asDouble());
14696                         const fp16type  sx2     (esx.asDouble() / 2.0);
14697
14698                         result  = sx2.asDouble();
14699                         error   = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14700                 }
14701                 else
14702                 {
14703                         TCU_THROW(InternalError, "Unknown flavor");
14704                 }
14705
14706                 out[0] = fp16type(result).bits();
14707                 min[0] = result - error;
14708                 max[0] = result + error;
14709
14710                 return true;
14711         }
14712 };
14713
14714 struct fp16Cosh : public fp16PerComponent
14715 {
14716         fp16Cosh() : fp16PerComponent()
14717         {
14718                 flavorNames.push_back("Double");
14719                 flavorNames.push_back("ExpFP16");
14720         }
14721
14722         template<class fp16type>
14723         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14724         {
14725                 const fp16type  x               (*in[0]);
14726                 const double    d               (x.asDouble());
14727                 const double    ulps    (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14728                 double                  result  (0.0);
14729
14730                 if (getFlavor() == 0)
14731                 {
14732                         result = deCosh(d);
14733                 }
14734                 else if (getFlavor() == 1)
14735                 {
14736                         const fp16type  epx     (deExp(d));
14737                         const fp16type  enx     (deExp(-d));
14738                         const fp16type  esx     (epx.asDouble() + enx.asDouble());
14739                         const fp16type  sx2     (esx.asDouble() / 2.0);
14740
14741                         result = sx2.asDouble();
14742                 }
14743                 else
14744                 {
14745                         TCU_THROW(InternalError, "Unknown flavor");
14746                 }
14747
14748                 out[0] = fp16type(result).bits();
14749                 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14750                 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14751
14752                 return true;
14753         }
14754 };
14755
14756 struct fp16Tanh : public fp16PerComponent
14757 {
14758         fp16Tanh() : fp16PerComponent()
14759         {
14760                 flavorNames.push_back("Tanh");
14761                 flavorNames.push_back("SinhCosh");
14762                 flavorNames.push_back("SinhCoshFP16");
14763                 flavorNames.push_back("PolyFP16");
14764         }
14765
14766         virtual double getULPs (vector<const deFloat16*>& in)
14767         {
14768                 const tcu::Float16      x       (*in[0]);
14769                 const double            d       (x.asDouble());
14770
14771                 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14772         }
14773
14774         template<class fp16type>
14775         inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14776         {
14777                 const fp16type  esx     (espx.asDouble() - esnx.asDouble());
14778                 const fp16type  sx2     (esx.asDouble() / 2.0);
14779                 const fp16type  ecx     (ecpx.asDouble() + ecnx.asDouble());
14780                 const fp16type  cx2     (ecx.asDouble() / 2.0);
14781                 const fp16type  tg      (sx2.asDouble() / cx2.asDouble());
14782                 const double    rez     (tg.asDouble());
14783
14784                 return rez;
14785         }
14786
14787         template<class fp16type>
14788         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14789         {
14790                 const fp16type  x               (*in[0]);
14791                 const double    d               (x.asDouble());
14792                 double                  result  (0.0);
14793
14794                 if (getFlavor() == 0)
14795                 {
14796                         result  = deTanh(d);
14797                         min[0]  = getMin(result, getULPs(in));
14798                         max[0]  = getMax(result, getULPs(in));
14799                 }
14800                 else if (getFlavor() == 1)
14801                 {
14802                         result  = deSinh(d) / deCosh(d);
14803                         min[0]  = getMin(result, getULPs(in));
14804                         max[0]  = getMax(result, getULPs(in));
14805                 }
14806                 else if (getFlavor() == 2)
14807                 {
14808                         const fp16type  s       (deSinh(d));
14809                         const fp16type  c       (deCosh(d));
14810
14811                         result  = s.asDouble() / c.asDouble();
14812                         min[0]  = getMin(result, getULPs(in));
14813                         max[0]  = getMax(result, getULPs(in));
14814                 }
14815                 else if (getFlavor() == 3)
14816                 {
14817                         const double    ulps    (getULPs(in));
14818                         const double    epxm    (deExp( d));
14819                         const double    enxm    (deExp(-d));
14820                         const double    epxmerr = floatFormat16.ulp(epxm, ulps);
14821                         const double    enxmerr = floatFormat16.ulp(enxm, ulps);
14822                         const fp16type  epx[]   = { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14823                         const fp16type  enx[]   = { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14824                         const fp16type  epxm16  (epxm);
14825                         const fp16type  enxm16  (enxm);
14826                         vector<double>  tgs;
14827
14828                         for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14829                         for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14830                         for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14831                         for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14832                         {
14833                                 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14834
14835                                 tgs.push_back(tgh);
14836                         }
14837
14838                         result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14839                         min[0] = *std::min_element(tgs.begin(), tgs.end());
14840                         max[0] = *std::max_element(tgs.begin(), tgs.end());
14841                 }
14842                 else
14843                 {
14844                         TCU_THROW(InternalError, "Unknown flavor");
14845                 }
14846
14847                 out[0] = fp16type(result).bits();
14848
14849                 return true;
14850         }
14851 };
14852
14853 struct fp16Asinh : public fp16PerComponent
14854 {
14855         fp16Asinh() : fp16PerComponent()
14856         {
14857                 flavorNames.push_back("Double");
14858                 flavorNames.push_back("PolyFP16Wiki");
14859                 flavorNames.push_back("PolyFP16Abs");
14860         }
14861
14862         virtual double getULPs (vector<const deFloat16*>& in)
14863         {
14864                 DE_UNREF(in);
14865
14866                 return 256.0; // This is not a precision test. Value is not from spec
14867         }
14868
14869         template<class fp16type>
14870         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14871         {
14872                 const fp16type  x               (*in[0]);
14873                 const double    d               (x.asDouble());
14874                 double                  result  (0.0);
14875
14876                 if (getFlavor() == 0)
14877                 {
14878                         result = deAsinh(d);
14879                 }
14880                 else if (getFlavor() == 1)
14881                 {
14882                         const fp16type  x2              (d * d);
14883                         const fp16type  x2p1    (x2.asDouble() + 1.0);
14884                         const fp16type  sq              (deSqrt(x2p1.asDouble()));
14885                         const fp16type  sxsq    (d + sq.asDouble());
14886                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
14887
14888                         if (lsxsq.isInf())
14889                                 return false;
14890
14891                         result = lsxsq.asDouble();
14892                 }
14893                 else if (getFlavor() == 2)
14894                 {
14895                         const fp16type  x2              (d * d);
14896                         const fp16type  x2p1    (x2.asDouble() + 1.0);
14897                         const fp16type  sq              (deSqrt(x2p1.asDouble()));
14898                         const fp16type  sxsq    (deAbs(d) + sq.asDouble());
14899                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
14900
14901                         result = deSign(d) * lsxsq.asDouble();
14902                 }
14903                 else
14904                 {
14905                         TCU_THROW(InternalError, "Unknown flavor");
14906                 }
14907
14908                 out[0] = fp16type(result).bits();
14909                 min[0] = getMin(result, getULPs(in));
14910                 max[0] = getMax(result, getULPs(in));
14911
14912                 return true;
14913         }
14914 };
14915
14916 struct fp16Acosh : public fp16PerComponent
14917 {
14918         fp16Acosh() : fp16PerComponent()
14919         {
14920                 flavorNames.push_back("Double");
14921                 flavorNames.push_back("PolyFP16");
14922         }
14923
14924         virtual double getULPs (vector<const deFloat16*>& in)
14925         {
14926                 DE_UNREF(in);
14927
14928                 return 16.0; // This is not a precision test. Value is not from spec
14929         }
14930
14931         template<class fp16type>
14932         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14933         {
14934                 const fp16type  x               (*in[0]);
14935                 const double    d               (x.asDouble());
14936                 double                  result  (0.0);
14937
14938                 if (!x.isNaN() && d < 1.0)
14939                         return false;
14940
14941                 if (getFlavor() == 0)
14942                 {
14943                         result = deAcosh(d);
14944                 }
14945                 else if (getFlavor() == 1)
14946                 {
14947                         const fp16type  x2              (d * d);
14948                         const fp16type  x2m1    (x2.asDouble() - 1.0);
14949                         const fp16type  sq              (deSqrt(x2m1.asDouble()));
14950                         const fp16type  sxsq    (d + sq.asDouble());
14951                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
14952
14953                         result = lsxsq.asDouble();
14954                 }
14955                 else
14956                 {
14957                         TCU_THROW(InternalError, "Unknown flavor");
14958                 }
14959
14960                 out[0] = fp16type(result).bits();
14961                 min[0] = getMin(result, getULPs(in));
14962                 max[0] = getMax(result, getULPs(in));
14963
14964                 return true;
14965         }
14966 };
14967
14968 struct fp16Atanh : public fp16PerComponent
14969 {
14970         fp16Atanh() : fp16PerComponent()
14971         {
14972                 flavorNames.push_back("Double");
14973                 flavorNames.push_back("PolyFP16");
14974         }
14975
14976         template<class fp16type>
14977         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14978         {
14979                 const fp16type  x               (*in[0]);
14980                 const double    d               (x.asDouble());
14981                 double                  result  (0.0);
14982
14983                 if (deAbs(d) >= 1.0)
14984                         return false;
14985
14986                 if (getFlavor() == 0)
14987                 {
14988                         const double    ulps    (16.0); // This is not a precision test. Value is not from spec
14989
14990                         result = deAtanh(d);
14991                         min[0] = getMin(result, ulps);
14992                         max[0] = getMax(result, ulps);
14993                 }
14994                 else if (getFlavor() == 1)
14995                 {
14996                         const fp16type  x1a             (1.0 + d);
14997                         const fp16type  x1b             (1.0 - d);
14998                         const fp16type  x1d             (x1a.asDouble() / x1b.asDouble());
14999                         const fp16type  lx1d    (deLog(x1d.asDouble()));
15000                         const fp16type  lx1d2   (0.5 * lx1d.asDouble());
15001                         const double    error   (2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
15002
15003                         result = lx1d2.asDouble();
15004                         min[0] = result - error;
15005                         max[0] = result + error;
15006                 }
15007                 else
15008                 {
15009                         TCU_THROW(InternalError, "Unknown flavor");
15010                 }
15011
15012                 out[0] = fp16type(result).bits();
15013
15014                 return true;
15015         }
15016 };
15017
15018 struct fp16Exp : public fp16PerComponent
15019 {
15020         template<class fp16type>
15021         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15022         {
15023                 const fp16type  x               (*in[0]);
15024                 const double    d               (x.asDouble());
15025                 const double    ulps    (10.0 * (1.0 + 2.0 * deAbs(d)));
15026                 const double    result  (deExp(d));
15027
15028                 out[0] = fp16type(result).bits();
15029                 min[0] = getMin(result, ulps);
15030                 max[0] = getMax(result, ulps);
15031
15032                 return true;
15033         }
15034 };
15035
15036 struct fp16Log : public fp16PerComponent
15037 {
15038         template<class fp16type>
15039         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15040         {
15041                 const fp16type  x               (*in[0]);
15042                 const double    d               (x.asDouble());
15043                 const double    result  (deLog(d));
15044                 const double    error   (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15045
15046                 if (d <= 0.0)
15047                         return false;
15048
15049                 out[0] = fp16type(result).bits();
15050                 min[0] = result - error;
15051                 max[0] = result + error;
15052
15053                 return true;
15054         }
15055 };
15056
15057 struct fp16Exp2 : public fp16PerComponent
15058 {
15059         template<class fp16type>
15060         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15061         {
15062                 const fp16type  x               (*in[0]);
15063                 const double    d               (x.asDouble());
15064                 const double    result  (deExp2(d));
15065                 const double    ulps    (1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15066
15067                 out[0] = fp16type(result).bits();
15068                 min[0] = getMin(result, ulps);
15069                 max[0] = getMax(result, ulps);
15070
15071                 return true;
15072         }
15073 };
15074
15075 struct fp16Log2 : public fp16PerComponent
15076 {
15077         template<class fp16type>
15078         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15079         {
15080                 const fp16type  x               (*in[0]);
15081                 const double    d               (x.asDouble());
15082                 const double    result  (deLog2(d));
15083                 const double    error   (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15084
15085                 if (d <= 0.0)
15086                         return false;
15087
15088                 out[0] = fp16type(result).bits();
15089                 min[0] = result - error;
15090                 max[0] = result + error;
15091
15092                 return true;
15093         }
15094 };
15095
15096 struct fp16Sqrt : public fp16PerComponent
15097 {
15098         virtual double getULPs (vector<const deFloat16*>& in)
15099         {
15100                 DE_UNREF(in);
15101
15102                 return 6.0;
15103         }
15104
15105         template<class fp16type>
15106         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15107         {
15108                 const fp16type  x               (*in[0]);
15109                 const double    d               (x.asDouble());
15110                 const double    result  (deSqrt(d));
15111
15112                 if (!x.isNaN() && d < 0.0)
15113                         return false;
15114
15115                 out[0] = fp16type(result).bits();
15116                 min[0] = getMin(result, getULPs(in));
15117                 max[0] = getMax(result, getULPs(in));
15118
15119                 return true;
15120         }
15121 };
15122
15123 struct fp16InverseSqrt : public fp16PerComponent
15124 {
15125         virtual double getULPs (vector<const deFloat16*>& in)
15126         {
15127                 DE_UNREF(in);
15128
15129                 return 2.0;
15130         }
15131
15132         template<class fp16type>
15133         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15134         {
15135                 const fp16type  x               (*in[0]);
15136                 const double    d               (x.asDouble());
15137                 const double    result  (1.0/deSqrt(d));
15138
15139                 if (!x.isNaN() && d <= 0.0)
15140                         return false;
15141
15142                 out[0] = fp16type(result).bits();
15143                 min[0] = getMin(result, getULPs(in));
15144                 max[0] = getMax(result, getULPs(in));
15145
15146                 return true;
15147         }
15148 };
15149
15150 struct fp16ModfFrac : public fp16PerComponent
15151 {
15152         template<class fp16type>
15153         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15154         {
15155                 const fp16type  x               (*in[0]);
15156                 const double    d               (x.asDouble());
15157                 double                  i               (0.0);
15158                 const double    result  (deModf(d, &i));
15159
15160                 if (x.isInf() || x.isNaN())
15161                         return false;
15162
15163                 out[0] = fp16type(result).bits();
15164                 min[0] = getMin(result, getULPs(in));
15165                 max[0] = getMax(result, getULPs(in));
15166
15167                 return true;
15168         }
15169 };
15170
15171 struct fp16ModfInt : public fp16PerComponent
15172 {
15173         template<class fp16type>
15174         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15175         {
15176                 const fp16type  x               (*in[0]);
15177                 const double    d               (x.asDouble());
15178                 double                  i               (0.0);
15179                 const double    unused  (deModf(d, &i));
15180                 const double    result  (i);
15181
15182                 DE_UNREF(unused);
15183
15184                 if (x.isInf() || x.isNaN())
15185                         return false;
15186
15187                 out[0] = fp16type(result).bits();
15188                 min[0] = getMin(result, getULPs(in));
15189                 max[0] = getMax(result, getULPs(in));
15190
15191                 return true;
15192         }
15193 };
15194
15195 struct fp16FrexpS : public fp16PerComponent
15196 {
15197         template<class fp16type>
15198         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15199         {
15200                 const fp16type  x               (*in[0]);
15201                 const double    d               (x.asDouble());
15202                 int                             e               (0);
15203                 const double    result  (deFrExp(d, &e));
15204
15205                 if (x.isNaN() || x.isInf())
15206                         return false;
15207
15208                 out[0] = fp16type(result).bits();
15209                 min[0] = getMin(result, getULPs(in));
15210                 max[0] = getMax(result, getULPs(in));
15211
15212                 return true;
15213         }
15214 };
15215
15216 struct fp16FrexpE : public fp16PerComponent
15217 {
15218         template<class fp16type>
15219         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15220         {
15221                 const fp16type  x               (*in[0]);
15222                 const double    d               (x.asDouble());
15223                 int                             e               (0);
15224                 const double    unused  (deFrExp(d, &e));
15225                 const double    result  (static_cast<double>(e));
15226
15227                 DE_UNREF(unused);
15228
15229                 if (x.isNaN() || x.isInf())
15230                         return false;
15231
15232                 out[0] = fp16type(result).bits();
15233                 min[0] = getMin(result, getULPs(in));
15234                 max[0] = getMax(result, getULPs(in));
15235
15236                 return true;
15237         }
15238 };
15239
15240 struct fp16OpFAdd : public fp16PerComponent
15241 {
15242         template<class fp16type>
15243         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15244         {
15245                 const fp16type  x               (*in[0]);
15246                 const fp16type  y               (*in[1]);
15247                 const double    xd              (x.asDouble());
15248                 const double    yd              (y.asDouble());
15249                 const double    result  (xd + yd);
15250
15251                 out[0] = fp16type(result).bits();
15252                 min[0] = getMin(result, getULPs(in));
15253                 max[0] = getMax(result, getULPs(in));
15254
15255                 return true;
15256         }
15257 };
15258
15259 struct fp16OpFSub : public fp16PerComponent
15260 {
15261         template<class fp16type>
15262         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15263         {
15264                 const fp16type  x               (*in[0]);
15265                 const fp16type  y               (*in[1]);
15266                 const double    xd              (x.asDouble());
15267                 const double    yd              (y.asDouble());
15268                 const double    result  (xd - yd);
15269
15270                 out[0] = fp16type(result).bits();
15271                 min[0] = getMin(result, getULPs(in));
15272                 max[0] = getMax(result, getULPs(in));
15273
15274                 return true;
15275         }
15276 };
15277
15278 struct fp16OpFMul : public fp16PerComponent
15279 {
15280         template<class fp16type>
15281         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15282         {
15283                 const fp16type  x               (*in[0]);
15284                 const fp16type  y               (*in[1]);
15285                 const double    xd              (x.asDouble());
15286                 const double    yd              (y.asDouble());
15287                 const double    result  (xd * yd);
15288
15289                 out[0] = fp16type(result).bits();
15290                 min[0] = getMin(result, getULPs(in));
15291                 max[0] = getMax(result, getULPs(in));
15292
15293                 return true;
15294         }
15295 };
15296
15297 struct fp16OpFDiv : public fp16PerComponent
15298 {
15299         fp16OpFDiv() : fp16PerComponent()
15300         {
15301                 flavorNames.push_back("DirectDiv");
15302                 flavorNames.push_back("InverseDiv");
15303         }
15304
15305         template<class fp16type>
15306         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15307         {
15308                 const fp16type  x                       (*in[0]);
15309                 const fp16type  y                       (*in[1]);
15310                 const double    xd                      (x.asDouble());
15311                 const double    yd                      (y.asDouble());
15312                 const double    unspecUlp       (16.0);
15313                 const double    ulpCnt          (de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15314                 double                  result          (0.0);
15315
15316                 if (y.isZero())
15317                         return false;
15318
15319                 if (getFlavor() == 0)
15320                 {
15321                         result = (xd / yd);
15322                 }
15323                 else if (getFlavor() == 1)
15324                 {
15325                         const double    invyd   (1.0 / yd);
15326                         const fp16type  invy    (invyd);
15327
15328                         result = (xd * invy.asDouble());
15329                 }
15330                 else
15331                 {
15332                         TCU_THROW(InternalError, "Unknown flavor");
15333                 }
15334
15335                 out[0] = fp16type(result).bits();
15336                 min[0] = getMin(result, ulpCnt);
15337                 max[0] = getMax(result, ulpCnt);
15338
15339                 return true;
15340         }
15341 };
15342
15343 struct fp16Atan2 : public fp16PerComponent
15344 {
15345         fp16Atan2() : fp16PerComponent()
15346         {
15347                 flavorNames.push_back("DoubleCalc");
15348                 flavorNames.push_back("DoubleCalc_PI");
15349         }
15350
15351         virtual double getULPs(vector<const deFloat16*>& in)
15352         {
15353                 DE_UNREF(in);
15354
15355                 return 2 * 5.0; // This is not a precision test. Value is not from spec
15356         }
15357
15358         template<class fp16type>
15359         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15360         {
15361                 const fp16type  x               (*in[0]);
15362                 const fp16type  y               (*in[1]);
15363                 const double    xd              (x.asDouble());
15364                 const double    yd              (y.asDouble());
15365                 double                  result  (0.0);
15366
15367                 if ((x.isZero() && y.isZero())||(x.isInf() && y.isInf()))
15368                         return false;
15369
15370                 if (getFlavor() == 0)
15371                 {
15372                         result  = deAtan2(xd, yd);
15373                 }
15374                 else if (getFlavor() == 1)
15375                 {
15376                         const double    ulps    (2.0 * 5.0); // This is not a precision test. Value is not from spec
15377                         const double    eps             (floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15378
15379                         result  = deAtan2(xd, yd);
15380
15381                         if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15382                                 result  = -result;
15383                 }
15384                 else
15385                 {
15386                         TCU_THROW(InternalError, "Unknown flavor");
15387                 }
15388
15389                 out[0] = fp16type(result).bits();
15390                 min[0] = getMin(result, getULPs(in));
15391                 max[0] = getMax(result, getULPs(in));
15392
15393                 return true;
15394         }
15395 };
15396
15397 struct fp16Pow : public fp16PerComponent
15398 {
15399         fp16Pow() : fp16PerComponent()
15400         {
15401                 flavorNames.push_back("Pow");
15402                 flavorNames.push_back("PowLog2");
15403                 flavorNames.push_back("PowLog2FP16");
15404         }
15405
15406         template<class fp16type>
15407         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15408         {
15409                 const fp16type  x               (*in[0]);
15410                 const fp16type  y               (*in[1]);
15411                 const double    xd              (x.asDouble());
15412                 const double    yd              (y.asDouble());
15413                 const double    logxeps (de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15414                 const double    ulps1   (1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15415                 const double    ulps2   (1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15416                 const double    ulps    (deMax(deAbs(ulps1), deAbs(ulps2)));
15417                 double                  result  (0.0);
15418
15419                 if (xd < 0.0)
15420                         return false;
15421
15422                 if (x.isZero() && yd <= 0.0)
15423                         return false;
15424
15425                 if (getFlavor() == 0)
15426                 {
15427                         result = dePow(xd, yd);
15428                 }
15429                 else if (getFlavor() == 1)
15430                 {
15431                         const double    l2d     (deLog2(xd));
15432                         const double    e2d     (deExp2(yd * l2d));
15433
15434                         result = e2d;
15435                 }
15436                 else if (getFlavor() == 2)
15437                 {
15438                         const double    l2d     (deLog2(xd));
15439                         const fp16type  l2      (l2d);
15440                         const double    e2d     (deExp2(yd * l2.asDouble()));
15441                         const fp16type  e2      (e2d);
15442
15443                         result = e2.asDouble();
15444                 }
15445                 else
15446                 {
15447                         TCU_THROW(InternalError, "Unknown flavor");
15448                 }
15449
15450                 out[0] = fp16type(result).bits();
15451                 min[0] = getMin(result, ulps);
15452                 max[0] = getMax(result, ulps);
15453
15454                 return true;
15455         }
15456 };
15457
15458 struct fp16FMin : public fp16PerComponent
15459 {
15460         template<class fp16type>
15461         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15462         {
15463                 const fp16type  x               (*in[0]);
15464                 const fp16type  y               (*in[1]);
15465                 const double    xd              (x.asDouble());
15466                 const double    yd              (y.asDouble());
15467                 const double    result  (deMin(xd, yd));
15468
15469                 if (x.isNaN() || y.isNaN())
15470                         return false;
15471
15472                 out[0] = fp16type(result).bits();
15473                 min[0] = getMin(result, getULPs(in));
15474                 max[0] = getMax(result, getULPs(in));
15475
15476                 return true;
15477         }
15478 };
15479
15480 struct fp16FMax : public fp16PerComponent
15481 {
15482         template<class fp16type>
15483         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15484         {
15485                 const fp16type  x               (*in[0]);
15486                 const fp16type  y               (*in[1]);
15487                 const double    xd              (x.asDouble());
15488                 const double    yd              (y.asDouble());
15489                 const double    result  (deMax(xd, yd));
15490
15491                 if (x.isNaN() || y.isNaN())
15492                         return false;
15493
15494                 out[0] = fp16type(result).bits();
15495                 min[0] = getMin(result, getULPs(in));
15496                 max[0] = getMax(result, getULPs(in));
15497
15498                 return true;
15499         }
15500 };
15501
15502 struct fp16Step : public fp16PerComponent
15503 {
15504         template<class fp16type>
15505         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15506         {
15507                 const fp16type  edge    (*in[0]);
15508                 const fp16type  x               (*in[1]);
15509                 const double    edged   (edge.asDouble());
15510                 const double    xd              (x.asDouble());
15511                 const double    result  (deStep(edged, xd));
15512
15513                 out[0] = fp16type(result).bits();
15514                 min[0] = getMin(result, getULPs(in));
15515                 max[0] = getMax(result, getULPs(in));
15516
15517                 return true;
15518         }
15519 };
15520
15521 struct fp16Ldexp : public fp16PerComponent
15522 {
15523         template<class fp16type>
15524         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15525         {
15526                 const fp16type  x               (*in[0]);
15527                 const fp16type  y               (*in[1]);
15528                 const double    xd              (x.asDouble());
15529                 const int               yd              (static_cast<int>(deTrunc(y.asDouble())));
15530                 const double    result  (deLdExp(xd, yd));
15531
15532                 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15533                         return false;
15534
15535                 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15536                 if (fp16type(result).isInf())
15537                         return false;
15538
15539                 out[0] = fp16type(result).bits();
15540                 min[0] = getMin(result, getULPs(in));
15541                 max[0] = getMax(result, getULPs(in));
15542
15543                 return true;
15544         }
15545 };
15546
15547 struct fp16FClamp : public fp16PerComponent
15548 {
15549         template<class fp16type>
15550         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15551         {
15552                 const fp16type  x               (*in[0]);
15553                 const fp16type  minVal  (*in[1]);
15554                 const fp16type  maxVal  (*in[2]);
15555                 const double    xd              (x.asDouble());
15556                 const double    minVald (minVal.asDouble());
15557                 const double    maxVald (maxVal.asDouble());
15558                 const double    result  (deClamp(xd, minVald, maxVald));
15559
15560                 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15561                         return false;
15562
15563                 out[0] = fp16type(result).bits();
15564                 min[0] = getMin(result, getULPs(in));
15565                 max[0] = getMax(result, getULPs(in));
15566
15567                 return true;
15568         }
15569 };
15570
15571 struct fp16FMix : public fp16PerComponent
15572 {
15573         fp16FMix() : fp16PerComponent()
15574         {
15575                 flavorNames.push_back("DoubleCalc");
15576                 flavorNames.push_back("EmulatingFP16");
15577                 flavorNames.push_back("EmulatingFP16YminusX");
15578         }
15579
15580         template<class fp16type>
15581         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15582         {
15583                 const fp16type  x               (*in[0]);
15584                 const fp16type  y               (*in[1]);
15585                 const fp16type  a               (*in[2]);
15586                 const double    ulps    (8.0); // This is not a precision test. Value is not from spec
15587                 double                  result  (0.0);
15588
15589                 if (getFlavor() == 0)
15590                 {
15591                         const double    xd              (x.asDouble());
15592                         const double    yd              (y.asDouble());
15593                         const double    ad              (a.asDouble());
15594                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15595                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
15596                         const double    eps             (xeps + yeps);
15597
15598                         result = deMix(xd, yd, ad);
15599                         min[0] = result - eps;
15600                         max[0] = result + eps;
15601                 }
15602                 else if (getFlavor() == 1)
15603                 {
15604                         const double    xd              (x.asDouble());
15605                         const double    yd              (y.asDouble());
15606                         const double    ad              (a.asDouble());
15607                         const fp16type  am              (1.0 - ad);
15608                         const double    amd             (am.asDouble());
15609                         const fp16type  xam             (xd * amd);
15610                         const double    xamd    (xam.asDouble());
15611                         const fp16type  ya              (yd * ad);
15612                         const double    yad             (ya.asDouble());
15613                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15614                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
15615                         const double    eps             (xeps + yeps);
15616
15617                         result = xamd + yad;
15618                         min[0] = result - eps;
15619                         max[0] = result + eps;
15620                 }
15621                 else if (getFlavor() == 2)
15622                 {
15623                         const double    xd              (x.asDouble());
15624                         const double    yd              (y.asDouble());
15625                         const double    ad              (a.asDouble());
15626                         const fp16type  ymx             (yd - xd);
15627                         const double    ymxd    (ymx.asDouble());
15628                         const fp16type  ymxa    (ymxd * ad);
15629                         const double    ymxad   (ymxa.asDouble());
15630                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15631                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
15632                         const double    eps             (xeps + yeps);
15633
15634                         result = xd + ymxad;
15635                         min[0] = result - eps;
15636                         max[0] = result + eps;
15637                 }
15638                 else
15639                 {
15640                         TCU_THROW(InternalError, "Unknown flavor");
15641                 }
15642
15643                 out[0] = fp16type(result).bits();
15644
15645                 return true;
15646         }
15647 };
15648
15649 struct fp16SmoothStep : public fp16PerComponent
15650 {
15651         fp16SmoothStep() : fp16PerComponent()
15652         {
15653                 flavorNames.push_back("FloatCalc");
15654                 flavorNames.push_back("EmulatingFP16");
15655                 flavorNames.push_back("EmulatingFP16WClamp");
15656         }
15657
15658         virtual double getULPs(vector<const deFloat16*>& in)
15659         {
15660                 DE_UNREF(in);
15661
15662                 return 4.0; // This is not a precision test. Value is not from spec
15663         }
15664
15665         template<class fp16type>
15666         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15667         {
15668                 const fp16type  edge0   (*in[0]);
15669                 const fp16type  edge1   (*in[1]);
15670                 const fp16type  x               (*in[2]);
15671                 double                  result  (0.0);
15672
15673                 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15674                         return false;
15675
15676                 if (edge0.isInf() || edge1.isInf() || x.isInf())
15677                         return false;
15678
15679                 if (getFlavor() == 0)
15680                 {
15681                         const float     edge0d  (edge0.asFloat());
15682                         const float     edge1d  (edge1.asFloat());
15683                         const float     xd              (x.asFloat());
15684                         const float     sstep   (deFloatSmoothStep(edge0d, edge1d, xd));
15685
15686                         result = sstep;
15687                 }
15688                 else if (getFlavor() == 1)
15689                 {
15690                         const double    edge0d  (edge0.asDouble());
15691                         const double    edge1d  (edge1.asDouble());
15692                         const double    xd              (x.asDouble());
15693
15694                         if (xd <= edge0d)
15695                                 result = 0.0;
15696                         else if (xd >= edge1d)
15697                                 result = 1.0;
15698                         else
15699                         {
15700                                 const fp16type  a       (xd - edge0d);
15701                                 const fp16type  b       (edge1d - edge0d);
15702                                 const fp16type  t       (a.asDouble() / b.asDouble());
15703                                 const fp16type  t2      (2.0 * t.asDouble());
15704                                 const fp16type  t3      (3.0 - t2.asDouble());
15705                                 const fp16type  t4      (t.asDouble() * t3.asDouble());
15706                                 const fp16type  t5      (t.asDouble() * t4.asDouble());
15707
15708                                 result = t5.asDouble();
15709                         }
15710                 }
15711                 else if (getFlavor() == 2)
15712                 {
15713                         const double    edge0d  (edge0.asDouble());
15714                         const double    edge1d  (edge1.asDouble());
15715                         const double    xd              (x.asDouble());
15716                         const fp16type  a       (xd - edge0d);
15717                         const fp16type  b       (edge1d - edge0d);
15718                         const fp16type  bi      (1.0 / b.asDouble());
15719                         const fp16type  t0      (a.asDouble() * bi.asDouble());
15720                         const double    tc      (deClamp(t0.asDouble(), 0.0, 1.0));
15721                         const fp16type  t       (tc);
15722                         const fp16type  t2      (2.0 * t.asDouble());
15723                         const fp16type  t3      (3.0 - t2.asDouble());
15724                         const fp16type  t4      (t.asDouble() * t3.asDouble());
15725                         const fp16type  t5      (t.asDouble() * t4.asDouble());
15726
15727                         result = t5.asDouble();
15728                 }
15729                 else
15730                 {
15731                         TCU_THROW(InternalError, "Unknown flavor");
15732                 }
15733
15734                 out[0] = fp16type(result).bits();
15735                 min[0] = getMin(result, getULPs(in));
15736                 max[0] = getMax(result, getULPs(in));
15737
15738                 return true;
15739         }
15740 };
15741
15742 struct fp16Fma : public fp16PerComponent
15743 {
15744         fp16Fma()
15745         {
15746                 flavorNames.push_back("DoubleCalc");
15747                 flavorNames.push_back("EmulatingFP16");
15748         }
15749
15750         virtual double getULPs(vector<const deFloat16*>& in)
15751         {
15752                 DE_UNREF(in);
15753
15754                 return 16.0;
15755         }
15756
15757         template<class fp16type>
15758         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15759         {
15760                 DE_ASSERT(in.size() == 3);
15761                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15762                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15763                 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15764                 DE_ASSERT(getOutCompCount() > 0);
15765
15766                 const fp16type  a               (*in[0]);
15767                 const fp16type  b               (*in[1]);
15768                 const fp16type  c               (*in[2]);
15769                 double                  result  (0.0);
15770
15771                 if (getFlavor() == 0)
15772                 {
15773                         const double    ad      (a.asDouble());
15774                         const double    bd      (b.asDouble());
15775                         const double    cd      (c.asDouble());
15776
15777                         result  = deMadd(ad, bd, cd);
15778                 }
15779                 else if (getFlavor() == 1)
15780                 {
15781                         const double    ad      (a.asDouble());
15782                         const double    bd      (b.asDouble());
15783                         const double    cd      (c.asDouble());
15784                         const fp16type  ab      (ad * bd);
15785                         const fp16type  r       (ab.asDouble() + cd);
15786
15787                         result  = r.asDouble();
15788                 }
15789                 else
15790                 {
15791                         TCU_THROW(InternalError, "Unknown flavor");
15792                 }
15793
15794                 out[0] = fp16type(result).bits();
15795                 min[0] = getMin(result, getULPs(in));
15796                 max[0] = getMax(result, getULPs(in));
15797
15798                 return true;
15799         }
15800 };
15801
15802
15803 struct fp16AllComponents : public fp16PerComponent
15804 {
15805         bool            callOncePerComponent    ()      { return false; }
15806 };
15807
15808 struct fp16Length : public fp16AllComponents
15809 {
15810         fp16Length() : fp16AllComponents()
15811         {
15812                 flavorNames.push_back("EmulatingFP16");
15813                 flavorNames.push_back("DoubleCalc");
15814         }
15815
15816         virtual double getULPs(vector<const deFloat16*>& in)
15817         {
15818                 DE_UNREF(in);
15819
15820                 return 4.0;
15821         }
15822
15823         template<class fp16type>
15824         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15825         {
15826                 DE_ASSERT(getOutCompCount() == 1);
15827                 DE_ASSERT(in.size() == 1);
15828
15829                 double  result  (0.0);
15830
15831                 if (getFlavor() == 0)
15832                 {
15833                         fp16type        r       (0.0);
15834
15835                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15836                         {
15837                                 const fp16type  x       (in[0][componentNdx]);
15838                                 const fp16type  q       (x.asDouble() * x.asDouble());
15839
15840                                 r = fp16type(r.asDouble() + q.asDouble());
15841                         }
15842
15843                         result = deSqrt(r.asDouble());
15844
15845                         out[0] = fp16type(result).bits();
15846                 }
15847                 else if (getFlavor() == 1)
15848                 {
15849                         double  r       (0.0);
15850
15851                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15852                         {
15853                                 const fp16type  x       (in[0][componentNdx]);
15854                                 const double    q       (x.asDouble() * x.asDouble());
15855
15856                                 r += q;
15857                         }
15858
15859                         result = deSqrt(r);
15860
15861                         out[0] = fp16type(result).bits();
15862                 }
15863                 else
15864                 {
15865                         TCU_THROW(InternalError, "Unknown flavor");
15866                 }
15867
15868                 min[0] = getMin(result, getULPs(in));
15869                 max[0] = getMax(result, getULPs(in));
15870
15871                 return true;
15872         }
15873 };
15874
15875 struct fp16Distance : public fp16AllComponents
15876 {
15877         fp16Distance() : fp16AllComponents()
15878         {
15879                 flavorNames.push_back("EmulatingFP16");
15880                 flavorNames.push_back("DoubleCalc");
15881         }
15882
15883         virtual double getULPs(vector<const deFloat16*>& in)
15884         {
15885                 DE_UNREF(in);
15886
15887                 return 4.0;
15888         }
15889
15890         template<class fp16type>
15891         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15892         {
15893                 DE_ASSERT(getOutCompCount() == 1);
15894                 DE_ASSERT(in.size() == 2);
15895                 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15896
15897                 double  result  (0.0);
15898
15899                 if (getFlavor() == 0)
15900                 {
15901                         fp16type        r       (0.0);
15902
15903                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15904                         {
15905                                 const fp16type  x       (in[0][componentNdx]);
15906                                 const fp16type  y       (in[1][componentNdx]);
15907                                 const fp16type  d       (x.asDouble() - y.asDouble());
15908                                 const fp16type  q       (d.asDouble() * d.asDouble());
15909
15910                                 r = fp16type(r.asDouble() + q.asDouble());
15911                         }
15912
15913                         result = deSqrt(r.asDouble());
15914                 }
15915                 else if (getFlavor() == 1)
15916                 {
15917                         double  r       (0.0);
15918
15919                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15920                         {
15921                                 const fp16type  x       (in[0][componentNdx]);
15922                                 const fp16type  y       (in[1][componentNdx]);
15923                                 const double    d       (x.asDouble() - y.asDouble());
15924                                 const double    q       (d * d);
15925
15926                                 r += q;
15927                         }
15928
15929                         result = deSqrt(r);
15930                 }
15931                 else
15932                 {
15933                         TCU_THROW(InternalError, "Unknown flavor");
15934                 }
15935
15936                 out[0] = fp16type(result).bits();
15937                 min[0] = getMin(result, getULPs(in));
15938                 max[0] = getMax(result, getULPs(in));
15939
15940                 return true;
15941         }
15942 };
15943
15944 struct fp16Cross : public fp16AllComponents
15945 {
15946         fp16Cross() : fp16AllComponents()
15947         {
15948                 flavorNames.push_back("EmulatingFP16");
15949                 flavorNames.push_back("DoubleCalc");
15950         }
15951
15952         virtual double getULPs(vector<const deFloat16*>& in)
15953         {
15954                 DE_UNREF(in);
15955
15956                 return 4.0;
15957         }
15958
15959         template<class fp16type>
15960         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15961         {
15962                 DE_ASSERT(getOutCompCount() == 3);
15963                 DE_ASSERT(in.size() == 2);
15964                 DE_ASSERT(getArgCompCount(0) == 3);
15965                 DE_ASSERT(getArgCompCount(1) == 3);
15966
15967                 if (getFlavor() == 0)
15968                 {
15969                         const fp16type  x0              (in[0][0]);
15970                         const fp16type  x1              (in[0][1]);
15971                         const fp16type  x2              (in[0][2]);
15972                         const fp16type  y0              (in[1][0]);
15973                         const fp16type  y1              (in[1][1]);
15974                         const fp16type  y2              (in[1][2]);
15975                         const fp16type  x1y2    (x1.asDouble() * y2.asDouble());
15976                         const fp16type  y1x2    (y1.asDouble() * x2.asDouble());
15977                         const fp16type  x2y0    (x2.asDouble() * y0.asDouble());
15978                         const fp16type  y2x0    (y2.asDouble() * x0.asDouble());
15979                         const fp16type  x0y1    (x0.asDouble() * y1.asDouble());
15980                         const fp16type  y0x1    (y0.asDouble() * x1.asDouble());
15981
15982                         out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
15983                         out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
15984                         out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
15985                 }
15986                 else if (getFlavor() == 1)
15987                 {
15988                         const fp16type  x0              (in[0][0]);
15989                         const fp16type  x1              (in[0][1]);
15990                         const fp16type  x2              (in[0][2]);
15991                         const fp16type  y0              (in[1][0]);
15992                         const fp16type  y1              (in[1][1]);
15993                         const fp16type  y2              (in[1][2]);
15994                         const double    x1y2    (x1.asDouble() * y2.asDouble());
15995                         const double    y1x2    (y1.asDouble() * x2.asDouble());
15996                         const double    x2y0    (x2.asDouble() * y0.asDouble());
15997                         const double    y2x0    (y2.asDouble() * x0.asDouble());
15998                         const double    x0y1    (x0.asDouble() * y1.asDouble());
15999                         const double    y0x1    (y0.asDouble() * x1.asDouble());
16000
16001                         out[0] = fp16type(x1y2 - y1x2).bits();
16002                         out[1] = fp16type(x2y0 - y2x0).bits();
16003                         out[2] = fp16type(x0y1 - y0x1).bits();
16004                 }
16005                 else
16006                 {
16007                         TCU_THROW(InternalError, "Unknown flavor");
16008                 }
16009
16010                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16011                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16012                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16013                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16014
16015                 return true;
16016         }
16017 };
16018
16019 struct fp16Normalize : public fp16AllComponents
16020 {
16021         fp16Normalize() : fp16AllComponents()
16022         {
16023                 flavorNames.push_back("EmulatingFP16");
16024                 flavorNames.push_back("DoubleCalc");
16025
16026                 permutationsFlavorStart = 0;
16027                 permutationsFlavorEnd = flavorNames.size();
16028
16029                 // flavorNames will be extended later
16030         }
16031
16032         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)
16033         {
16034                 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16035
16036                 if (argNo == 0 && argCompCount[argNo] == 0)
16037                 {
16038                         const size_t            maxPermutationsCount    = 24u; // Equal to 4!
16039                         std::vector<int>        indices;
16040
16041                         for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16042                                 indices.push_back(static_cast<int>(componentNdx));
16043
16044                         m_permutations.reserve(maxPermutationsCount);
16045
16046                         permutationsFlavorStart = flavorNames.size();
16047
16048                         do
16049                         {
16050                                 tcu::UVec4      permutation;
16051                                 std::string     name            = "Permutted_";
16052
16053                                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16054                                 {
16055                                         permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16056                                         name += de::toString(indices[componentNdx]);
16057                                 }
16058
16059                                 m_permutations.push_back(permutation);
16060                                 flavorNames.push_back(name);
16061
16062                         } while(std::next_permutation(indices.begin(), indices.end()));
16063
16064                         permutationsFlavorEnd = flavorNames.size();
16065                 }
16066
16067                 fp16AllComponents::setArgCompCount(argNo, compCount);
16068         }
16069         virtual double getULPs(vector<const deFloat16*>& in)
16070         {
16071                 DE_UNREF(in);
16072
16073                 return 8.0;
16074         }
16075
16076         template<class fp16type>
16077         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16078         {
16079                 DE_ASSERT(in.size() == 1);
16080                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16081
16082                 if (getFlavor() == 0)
16083                 {
16084                         fp16type        r(0.0);
16085
16086                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16087                         {
16088                                 const fp16type  x       (in[0][componentNdx]);
16089                                 const fp16type  q       (x.asDouble() * x.asDouble());
16090
16091                                 r = fp16type(r.asDouble() + q.asDouble());
16092                         }
16093
16094                         r = fp16type(deSqrt(r.asDouble()));
16095
16096                         if (r.isZero())
16097                                 return false;
16098
16099                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16100                         {
16101                                 const fp16type  x       (in[0][componentNdx]);
16102
16103                                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16104                         }
16105                 }
16106                 else if (getFlavor() == 1)
16107                 {
16108                         double  r(0.0);
16109
16110                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16111                         {
16112                                 const fp16type  x       (in[0][componentNdx]);
16113                                 const double    q       (x.asDouble() * x.asDouble());
16114
16115                                 r += q;
16116                         }
16117
16118                         r = deSqrt(r);
16119
16120                         if (r == 0)
16121                                 return false;
16122
16123                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16124                         {
16125                                 const fp16type  x       (in[0][componentNdx]);
16126
16127                                 out[componentNdx] = fp16type(x.asDouble() / r).bits();
16128                         }
16129                 }
16130                 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16131                 {
16132                         const int                       compCount               (static_cast<int>(getArgCompCount(0)));
16133                         const size_t            permutationNdx  (getFlavor() - permutationsFlavorStart);
16134                         const tcu::UVec4&       permutation             (m_permutations[permutationNdx]);
16135                         fp16type                        r                               (0.0);
16136
16137                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16138                         {
16139                                 const size_t    componentNdx    (permutation[permComponentNdx]);
16140                                 const fp16type  x                               (in[0][componentNdx]);
16141                                 const fp16type  q                               (x.asDouble() * x.asDouble());
16142
16143                                 r = fp16type(r.asDouble() + q.asDouble());
16144                         }
16145
16146                         r = fp16type(deSqrt(r.asDouble()));
16147
16148                         if (r.isZero())
16149                                 return false;
16150
16151                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16152                         {
16153                                 const size_t    componentNdx    (permutation[permComponentNdx]);
16154                                 const fp16type  x                               (in[0][componentNdx]);
16155
16156                                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16157                         }
16158                 }
16159                 else
16160                 {
16161                         TCU_THROW(InternalError, "Unknown flavor");
16162                 }
16163
16164                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16165                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16166                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16167                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16168
16169                 return true;
16170         }
16171
16172 private:
16173         std::vector<tcu::UVec4> m_permutations;
16174         size_t                                  permutationsFlavorStart;
16175         size_t                                  permutationsFlavorEnd;
16176 };
16177
16178 struct fp16FaceForward : public fp16AllComponents
16179 {
16180         virtual double getULPs(vector<const deFloat16*>& in)
16181         {
16182                 DE_UNREF(in);
16183
16184                 return 4.0;
16185         }
16186
16187         template<class fp16type>
16188         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16189         {
16190                 DE_ASSERT(in.size() == 3);
16191                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16192                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16193                 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16194
16195                 fp16type        dp(0.0);
16196
16197                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16198                 {
16199                         const fp16type  x       (in[1][componentNdx]);
16200                         const fp16type  y       (in[2][componentNdx]);
16201                         const double    xd      (x.asDouble());
16202                         const double    yd      (y.asDouble());
16203                         const fp16type  q       (xd * yd);
16204
16205                         dp = fp16type(dp.asDouble() + q.asDouble());
16206                 }
16207
16208                 if (dp.isNaN() || dp.isZero())
16209                         return false;
16210
16211                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16212                 {
16213                         const fp16type  n       (in[0][componentNdx]);
16214
16215                         out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16216                 }
16217
16218                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16219                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16220                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16221                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16222
16223                 return true;
16224         }
16225 };
16226
16227 struct fp16Reflect : public fp16AllComponents
16228 {
16229         fp16Reflect() : fp16AllComponents()
16230         {
16231                 flavorNames.push_back("EmulatingFP16");
16232                 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16233                 flavorNames.push_back("FloatCalc");
16234                 flavorNames.push_back("FloatCalc+KeepZeroSign");
16235                 flavorNames.push_back("EmulatingFP16+2Nfirst");
16236                 flavorNames.push_back("EmulatingFP16+2Ifirst");
16237         }
16238
16239         virtual double getULPs(vector<const deFloat16*>& in)
16240         {
16241                 DE_UNREF(in);
16242
16243                 return 256.0; // This is not a precision test. Value is not from spec
16244         }
16245
16246         template<class fp16type>
16247         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16248         {
16249                 DE_ASSERT(in.size() == 2);
16250                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16251                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16252
16253                 if (getFlavor() < 4)
16254                 {
16255                         const bool      keepZeroSign    ((flavor & 1) != 0 ? true : false);
16256                         const bool      floatCalc               ((flavor & 2) != 0 ? true : false);
16257
16258                         if (floatCalc)
16259                         {
16260                                 float   dp(0.0f);
16261
16262                                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16263                                 {
16264                                         const fp16type  i       (in[0][componentNdx]);
16265                                         const fp16type  n       (in[1][componentNdx]);
16266                                         const float             id      (i.asFloat());
16267                                         const float             nd      (n.asFloat());
16268                                         const float             qd      (id * nd);
16269
16270                                         if (keepZeroSign)
16271                                                 dp = (componentNdx == 0) ? qd : dp + qd;
16272                                         else
16273                                                 dp = dp + qd;
16274                                 }
16275
16276                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16277                                 {
16278                                         const fp16type  i               (in[0][componentNdx]);
16279                                         const fp16type  n               (in[1][componentNdx]);
16280                                         const float             dpnd    (dp * n.asFloat());
16281                                         const float             dpn2d   (2.0f * dpnd);
16282                                         const float             idpn2d  (i.asFloat() - dpn2d);
16283                                         const fp16type  result  (idpn2d);
16284
16285                                         out[componentNdx] = result.bits();
16286                                 }
16287                         }
16288                         else
16289                         {
16290                                 fp16type        dp(0.0);
16291
16292                                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16293                                 {
16294                                         const fp16type  i       (in[0][componentNdx]);
16295                                         const fp16type  n       (in[1][componentNdx]);
16296                                         const double    id      (i.asDouble());
16297                                         const double    nd      (n.asDouble());
16298                                         const fp16type  q       (id * nd);
16299
16300                                         if (keepZeroSign)
16301                                                 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16302                                         else
16303                                                 dp = fp16type(dp.asDouble() + q.asDouble());
16304                                 }
16305
16306                                 if (dp.isNaN())
16307                                         return false;
16308
16309                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16310                                 {
16311                                         const fp16type  i               (in[0][componentNdx]);
16312                                         const fp16type  n               (in[1][componentNdx]);
16313                                         const fp16type  dpn             (dp.asDouble() * n.asDouble());
16314                                         const fp16type  dpn2    (2 * dpn.asDouble());
16315                                         const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
16316
16317                                         out[componentNdx] = idpn2.bits();
16318                                 }
16319                         }
16320                 }
16321                 else if (getFlavor() == 4)
16322                 {
16323                         fp16type        dp(0.0);
16324
16325                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16326                         {
16327                                 const fp16type  i       (in[0][componentNdx]);
16328                                 const fp16type  n       (in[1][componentNdx]);
16329                                 const double    id      (i.asDouble());
16330                                 const double    nd      (n.asDouble());
16331                                 const fp16type  q       (id * nd);
16332
16333                                 dp = fp16type(dp.asDouble() + q.asDouble());
16334                         }
16335
16336                         if (dp.isNaN())
16337                                 return false;
16338
16339                         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16340                         {
16341                                 const fp16type  i               (in[0][componentNdx]);
16342                                 const fp16type  n               (in[1][componentNdx]);
16343                                 const fp16type  n2              (2 * n.asDouble());
16344                                 const fp16type  dpn2    (dp.asDouble() * n2.asDouble());
16345                                 const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
16346
16347                                 out[componentNdx] = idpn2.bits();
16348                         }
16349                 }
16350                 else if (getFlavor() == 5)
16351                 {
16352                         fp16type        dp2(0.0);
16353
16354                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16355                         {
16356                                 const fp16type  i       (in[0][componentNdx]);
16357                                 const fp16type  n       (in[1][componentNdx]);
16358                                 const fp16type  i2      (2.0 * i.asDouble());
16359                                 const double    i2d     (i2.asDouble());
16360                                 const double    nd      (n.asDouble());
16361                                 const fp16type  q       (i2d * nd);
16362
16363                                 dp2 = fp16type(dp2.asDouble() + q.asDouble());
16364                         }
16365
16366                         if (dp2.isNaN())
16367                                 return false;
16368
16369                         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16370                         {
16371                                 const fp16type  i               (in[0][componentNdx]);
16372                                 const fp16type  n               (in[1][componentNdx]);
16373                                 const fp16type  dpn2    (dp2.asDouble() * n.asDouble());
16374                                 const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
16375
16376                                 out[componentNdx] = idpn2.bits();
16377                         }
16378                 }
16379                 else
16380                 {
16381                         TCU_THROW(InternalError, "Unknown flavor");
16382                 }
16383
16384                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16385                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16386                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16387                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16388
16389                 return true;
16390         }
16391 };
16392
16393 struct fp16Refract : public fp16AllComponents
16394 {
16395         fp16Refract() : fp16AllComponents()
16396         {
16397                 flavorNames.push_back("EmulatingFP16");
16398                 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16399                 flavorNames.push_back("FloatCalc");
16400                 flavorNames.push_back("FloatCalc+KeepZeroSign");
16401         }
16402
16403         virtual double getULPs(vector<const deFloat16*>& in)
16404         {
16405                 DE_UNREF(in);
16406
16407                 return 8192.0; // This is not a precision test. Value is not from spec
16408         }
16409
16410         template<class fp16type>
16411         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16412         {
16413                 DE_ASSERT(in.size() == 3);
16414                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16415                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16416                 DE_ASSERT(getArgCompCount(2) == 1);
16417
16418                 const bool              keepZeroSign    ((flavor & 1) != 0 ? true : false);
16419                 const bool              doubleCalc              ((flavor & 2) != 0 ? true : false);
16420                 const fp16type  eta                             (*in[2]);
16421
16422                 if (doubleCalc)
16423                 {
16424                         double  dp      (0.0);
16425
16426                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16427                         {
16428                                 const fp16type  i       (in[0][componentNdx]);
16429                                 const fp16type  n       (in[1][componentNdx]);
16430                                 const double    id      (i.asDouble());
16431                                 const double    nd      (n.asDouble());
16432                                 const double    qd      (id * nd);
16433
16434                                 if (keepZeroSign)
16435                                         dp = (componentNdx == 0) ? qd : dp + qd;
16436                                 else
16437                                         dp = dp + qd;
16438                         }
16439
16440                         const double    eta2    (eta.asDouble() * eta.asDouble());
16441                         const double    dp2             (dp * dp);
16442                         const double    dp1             (1.0 - dp2);
16443                         const double    dpe             (eta2 * dp1);
16444                         const double    k               (1.0 - dpe);
16445
16446                         if (k < 0.0)
16447                         {
16448                                 const fp16type  zero    (0.0);
16449
16450                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16451                                         out[componentNdx] = zero.bits();
16452                         }
16453                         else
16454                         {
16455                                 const double    sk      (deSqrt(k));
16456
16457                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16458                                 {
16459                                         const fp16type  i               (in[0][componentNdx]);
16460                                         const fp16type  n               (in[1][componentNdx]);
16461                                         const double    etai    (i.asDouble() * eta.asDouble());
16462                                         const double    etadp   (eta.asDouble() * dp);
16463                                         const double    etadpk  (etadp + sk);
16464                                         const double    etadpkn (etadpk * n.asDouble());
16465                                         const double    full    (etai - etadpkn);
16466                                         const fp16type  result  (full);
16467
16468                                         if (result.isInf())
16469                                                 return false;
16470
16471                                         out[componentNdx] = result.bits();
16472                                 }
16473                         }
16474                 }
16475                 else
16476                 {
16477                         fp16type        dp      (0.0);
16478
16479                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16480                         {
16481                                 const fp16type  i       (in[0][componentNdx]);
16482                                 const fp16type  n       (in[1][componentNdx]);
16483                                 const double    id      (i.asDouble());
16484                                 const double    nd      (n.asDouble());
16485                                 const fp16type  q       (id * nd);
16486
16487                                 if (keepZeroSign)
16488                                         dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16489                                 else
16490                                         dp = fp16type(dp.asDouble() + q.asDouble());
16491                         }
16492
16493                         if (dp.isNaN())
16494                                 return false;
16495
16496                         const fp16type  eta2(eta.asDouble() * eta.asDouble());
16497                         const fp16type  dp2     (dp.asDouble() * dp.asDouble());
16498                         const fp16type  dp1     (1.0 - dp2.asDouble());
16499                         const fp16type  dpe     (eta2.asDouble() * dp1.asDouble());
16500                         const fp16type  k       (1.0 - dpe.asDouble());
16501
16502                         if (k.asDouble() < 0.0)
16503                         {
16504                                 const fp16type  zero    (0.0);
16505
16506                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16507                                         out[componentNdx] = zero.bits();
16508                         }
16509                         else
16510                         {
16511                                 const fp16type  sk      (deSqrt(k.asDouble()));
16512
16513                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16514                                 {
16515                                         const fp16type  i               (in[0][componentNdx]);
16516                                         const fp16type  n               (in[1][componentNdx]);
16517                                         const fp16type  etai    (i.asDouble() * eta.asDouble());
16518                                         const fp16type  etadp   (eta.asDouble() * dp.asDouble());
16519                                         const fp16type  etadpk  (etadp.asDouble() + sk.asDouble());
16520                                         const fp16type  etadpkn (etadpk.asDouble() * n.asDouble());
16521                                         const fp16type  full    (etai.asDouble() - etadpkn.asDouble());
16522
16523                                         if (full.isNaN() || full.isInf())
16524                                                 return false;
16525
16526                                         out[componentNdx] = full.bits();
16527                                 }
16528                         }
16529                 }
16530
16531                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16532                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16533                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16534                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16535
16536                 return true;
16537         }
16538 };
16539
16540 struct fp16Dot : public fp16AllComponents
16541 {
16542         fp16Dot() : fp16AllComponents()
16543         {
16544                 flavorNames.push_back("EmulatingFP16");
16545                 flavorNames.push_back("FloatCalc");
16546                 flavorNames.push_back("DoubleCalc");
16547
16548                 permutationsFlavorStart = 0;
16549                 permutationsFlavorEnd = flavorNames.size();
16550
16551                 // flavorNames will be extended later
16552         }
16553
16554         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)
16555         {
16556                 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16557
16558                 if (argNo == 0 && argCompCount[argNo] == 0)
16559                 {
16560                         const size_t            maxPermutationsCount    = 24u; // Equal to 4!
16561                         std::vector<int>        indices;
16562
16563                         for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16564                                 indices.push_back(static_cast<int>(componentNdx));
16565
16566                         m_permutations.reserve(maxPermutationsCount);
16567
16568                         permutationsFlavorStart = flavorNames.size();
16569
16570                         do
16571                         {
16572                                 tcu::UVec4      permutation;
16573                                 std::string     name            = "Permutted_";
16574
16575                                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16576                                 {
16577                                         permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16578                                         name += de::toString(indices[componentNdx]);
16579                                 }
16580
16581                                 m_permutations.push_back(permutation);
16582                                 flavorNames.push_back(name);
16583
16584                         } while(std::next_permutation(indices.begin(), indices.end()));
16585
16586                         permutationsFlavorEnd = flavorNames.size();
16587                 }
16588
16589                 fp16AllComponents::setArgCompCount(argNo, compCount);
16590         }
16591
16592         virtual double  getULPs(vector<const deFloat16*>& in)
16593         {
16594                 DE_UNREF(in);
16595
16596                 return 16.0; // This is not a precision test. Value is not from spec
16597         }
16598
16599         template<class fp16type>
16600         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16601         {
16602                 DE_ASSERT(in.size() == 2);
16603                 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16604                 DE_ASSERT(getOutCompCount() == 1);
16605
16606                 double  result  (0.0);
16607                 double  eps             (0.0);
16608
16609                 if (getFlavor() == 0)
16610                 {
16611                         fp16type        dp      (0.0);
16612
16613                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16614                         {
16615                                 const fp16type  x       (in[0][componentNdx]);
16616                                 const fp16type  y       (in[1][componentNdx]);
16617                                 const fp16type  q       (x.asDouble() * y.asDouble());
16618
16619                                 dp = fp16type(dp.asDouble() + q.asDouble());
16620                                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16621                         }
16622
16623                         result = dp.asDouble();
16624                 }
16625                 else if (getFlavor() == 1)
16626                 {
16627                         float   dp      (0.0);
16628
16629                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16630                         {
16631                                 const fp16type  x       (in[0][componentNdx]);
16632                                 const fp16type  y       (in[1][componentNdx]);
16633                                 const float             q       (x.asFloat() * y.asFloat());
16634
16635                                 dp += q;
16636                                 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16637                         }
16638
16639                         result = dp;
16640                 }
16641                 else if (getFlavor() == 2)
16642                 {
16643                         double  dp      (0.0);
16644
16645                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16646                         {
16647                                 const fp16type  x       (in[0][componentNdx]);
16648                                 const fp16type  y       (in[1][componentNdx]);
16649                                 const double    q       (x.asDouble() * y.asDouble());
16650
16651                                 dp += q;
16652                                 eps += floatFormat16.ulp(q, 2.0);
16653                         }
16654
16655                         result = dp;
16656                 }
16657                 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16658                 {
16659                         const int                       compCount               (static_cast<int>(getArgCompCount(1)));
16660                         const size_t            permutationNdx  (getFlavor() - permutationsFlavorStart);
16661                         const tcu::UVec4&       permutation             (m_permutations[permutationNdx]);
16662                         fp16type                        dp                              (0.0);
16663
16664                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16665                         {
16666                                 const size_t            componentNdx    (permutation[permComponentNdx]);
16667                                 const fp16type          x                               (in[0][componentNdx]);
16668                                 const fp16type          y                               (in[1][componentNdx]);
16669                                 const fp16type          q                               (x.asDouble() * y.asDouble());
16670
16671                                 dp = fp16type(dp.asDouble() + q.asDouble());
16672                                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16673                         }
16674
16675                         result = dp.asDouble();
16676                 }
16677                 else
16678                 {
16679                         TCU_THROW(InternalError, "Unknown flavor");
16680                 }
16681
16682                 out[0] = fp16type(result).bits();
16683                 min[0] = result - eps;
16684                 max[0] = result + eps;
16685
16686                 return true;
16687         }
16688
16689 private:
16690         std::vector<tcu::UVec4> m_permutations;
16691         size_t                                  permutationsFlavorStart;
16692         size_t                                  permutationsFlavorEnd;
16693 };
16694
16695 struct fp16VectorTimesScalar : public fp16AllComponents
16696 {
16697         virtual double getULPs(vector<const deFloat16*>& in)
16698         {
16699                 DE_UNREF(in);
16700
16701                 return 2.0;
16702         }
16703
16704         template<class fp16type>
16705         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16706         {
16707                 DE_ASSERT(in.size() == 2);
16708                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16709                 DE_ASSERT(getArgCompCount(1) == 1);
16710
16711                 fp16type        s       (*in[1]);
16712
16713                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16714                 {
16715                         const fp16type  x          (in[0][componentNdx]);
16716                         const double    result (s.asDouble() * x.asDouble());
16717                         const fp16type  m          (result);
16718
16719                         out[componentNdx] = m.bits();
16720                         min[componentNdx] = getMin(result, getULPs(in));
16721                         max[componentNdx] = getMax(result, getULPs(in));
16722                 }
16723
16724                 return true;
16725         }
16726 };
16727
16728 struct fp16MatrixBase : public fp16AllComponents
16729 {
16730         deUint32                getComponentValidity                    ()
16731         {
16732                 return static_cast<deUint32>(-1);
16733         }
16734
16735         inline size_t   getNdx                                                  (const size_t rowCount, const size_t col, const size_t row)
16736         {
16737                 const size_t minComponentCount  = 0;
16738                 const size_t maxComponentCount  = 3;
16739                 const size_t alignedRowsCount   = (rowCount == 3) ? 4 : rowCount;
16740
16741                 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16742                 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16743                 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16744                 DE_UNREF(minComponentCount);
16745                 DE_UNREF(maxComponentCount);
16746
16747                 return col * alignedRowsCount + row;
16748         }
16749
16750         deUint32                getComponentMatrixValidityMask  (size_t cols, size_t rows)
16751         {
16752                 deUint32        result  = 0u;
16753
16754                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16755                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16756                         {
16757                                 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16758
16759                                 DE_ASSERT(bitNdx < sizeof(result) * 8);
16760
16761                                 result |= (1<<bitNdx);
16762                         }
16763
16764                 return result;
16765         }
16766 };
16767
16768 template<size_t cols, size_t rows>
16769 struct fp16Transpose : public fp16MatrixBase
16770 {
16771         virtual double getULPs(vector<const deFloat16*>& in)
16772         {
16773                 DE_UNREF(in);
16774
16775                 return 1.0;
16776         }
16777
16778         deUint32        getComponentValidity    ()
16779         {
16780                 return getComponentMatrixValidityMask(rows, cols);
16781         }
16782
16783         template<class fp16type>
16784         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16785         {
16786                 DE_ASSERT(in.size() == 1);
16787
16788                 const size_t            alignedCols     = (cols == 3) ? 4 : cols;
16789                 const size_t            alignedRows     = (rows == 3) ? 4 : rows;
16790                 vector<deFloat16>       output          (alignedCols * alignedRows, 0);
16791
16792                 DE_ASSERT(output.size() == alignedCols * alignedRows);
16793
16794                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16795                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16796                                 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16797
16798                 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16799                 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16800                 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16801
16802                 return true;
16803         }
16804 };
16805
16806 template<size_t cols, size_t rows>
16807 struct fp16MatrixTimesScalar : public fp16MatrixBase
16808 {
16809         virtual double getULPs(vector<const deFloat16*>& in)
16810         {
16811                 DE_UNREF(in);
16812
16813                 return 4.0;
16814         }
16815
16816         deUint32        getComponentValidity    ()
16817         {
16818                 return getComponentMatrixValidityMask(cols, rows);
16819         }
16820
16821         template<class fp16type>
16822         bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16823         {
16824                 DE_ASSERT(in.size() == 2);
16825                 DE_ASSERT(getArgCompCount(1) == 1);
16826
16827                 const fp16type  y                       (in[1][0]);
16828                 const float             scalar          (y.asFloat());
16829                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16830                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16831
16832                 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16833                 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16834                 DE_UNREF(alignedCols);
16835
16836                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16837                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16838                         {
16839                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
16840                                 const fp16type  x       (in[0][ndx]);
16841                                 const double    result  (scalar * x.asFloat());
16842
16843                                 out[ndx] = fp16type(result).bits();
16844                                 min[ndx] = getMin(result, getULPs(in));
16845                                 max[ndx] = getMax(result, getULPs(in));
16846                         }
16847
16848                 return true;
16849         }
16850 };
16851
16852 template<size_t cols, size_t rows>
16853 struct fp16VectorTimesMatrix : public fp16MatrixBase
16854 {
16855         fp16VectorTimesMatrix() : fp16MatrixBase()
16856         {
16857                 flavorNames.push_back("EmulatingFP16");
16858                 flavorNames.push_back("FloatCalc");
16859         }
16860
16861         virtual double getULPs (vector<const deFloat16*>& in)
16862         {
16863                 DE_UNREF(in);
16864
16865                 return (8.0 * cols);
16866         }
16867
16868         deUint32 getComponentValidity ()
16869         {
16870                 return getComponentMatrixValidityMask(cols, 1);
16871         }
16872
16873         template<class fp16type>
16874         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16875         {
16876                 DE_ASSERT(in.size() == 2);
16877
16878                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16879                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16880
16881                 DE_ASSERT(getOutCompCount() == cols);
16882                 DE_ASSERT(getArgCompCount(0) == rows);
16883                 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16884                 DE_UNREF(alignedCols);
16885
16886                 if (getFlavor() == 0)
16887                 {
16888                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16889                         {
16890                                 fp16type        s       (fp16type::zero(1));
16891
16892                                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16893                                 {
16894                                         const fp16type  v       (in[0][rowNdx]);
16895                                         const float             vf      (v.asFloat());
16896                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16897                                         const fp16type  x       (in[1][ndx]);
16898                                         const float             xf      (x.asFloat());
16899                                         const fp16type  m       (vf * xf);
16900
16901                                         s = fp16type(s.asFloat() + m.asFloat());
16902                                 }
16903
16904                                 out[colNdx] = s.bits();
16905                                 min[colNdx] = getMin(s.asDouble(), getULPs(in));
16906                                 max[colNdx] = getMax(s.asDouble(), getULPs(in));
16907                         }
16908                 }
16909                 else if (getFlavor() == 1)
16910                 {
16911                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16912                         {
16913                                 float   s       (0.0f);
16914
16915                                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16916                                 {
16917                                         const fp16type  v       (in[0][rowNdx]);
16918                                         const float             vf      (v.asFloat());
16919                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16920                                         const fp16type  x       (in[1][ndx]);
16921                                         const float             xf      (x.asFloat());
16922                                         const float             m       (vf * xf);
16923
16924                                         s += m;
16925                                 }
16926
16927                                 out[colNdx] = fp16type(s).bits();
16928                                 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
16929                                 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
16930                         }
16931                 }
16932                 else
16933                 {
16934                         TCU_THROW(InternalError, "Unknown flavor");
16935                 }
16936
16937                 return true;
16938         }
16939 };
16940
16941 template<size_t cols, size_t rows>
16942 struct fp16MatrixTimesVector : public fp16MatrixBase
16943 {
16944         fp16MatrixTimesVector() : fp16MatrixBase()
16945         {
16946                 flavorNames.push_back("EmulatingFP16");
16947                 flavorNames.push_back("FloatCalc");
16948         }
16949
16950         virtual double getULPs (vector<const deFloat16*>& in)
16951         {
16952                 DE_UNREF(in);
16953
16954                 return (8.0 * rows);
16955         }
16956
16957         deUint32 getComponentValidity ()
16958         {
16959                 return getComponentMatrixValidityMask(rows, 1);
16960         }
16961
16962         template<class fp16type>
16963         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16964         {
16965                 DE_ASSERT(in.size() == 2);
16966
16967                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16968                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16969
16970                 DE_ASSERT(getOutCompCount() == rows);
16971                 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16972                 DE_ASSERT(getArgCompCount(1) == cols);
16973                 DE_UNREF(alignedCols);
16974
16975                 if (getFlavor() == 0)
16976                 {
16977                         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16978                         {
16979                                 fp16type        s       (fp16type::zero(1));
16980
16981                                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16982                                 {
16983                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16984                                         const fp16type  x       (in[0][ndx]);
16985                                         const float             xf      (x.asFloat());
16986                                         const fp16type  v       (in[1][colNdx]);
16987                                         const float             vf      (v.asFloat());
16988                                         const fp16type  m       (vf * xf);
16989
16990                                         s = fp16type(s.asFloat() + m.asFloat());
16991                                 }
16992
16993                                 out[rowNdx] = s.bits();
16994                                 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
16995                                 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
16996                         }
16997                 }
16998                 else if (getFlavor() == 1)
16999                 {
17000                         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17001                         {
17002                                 float   s       (0.0f);
17003
17004                                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17005                                 {
17006                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
17007                                         const fp16type  x       (in[0][ndx]);
17008                                         const float             xf      (x.asFloat());
17009                                         const fp16type  v       (in[1][colNdx]);
17010                                         const float             vf      (v.asFloat());
17011                                         const float             m       (vf * xf);
17012
17013                                         s += m;
17014                                 }
17015
17016                                 out[rowNdx] = fp16type(s).bits();
17017                                 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17018                                 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17019                         }
17020                 }
17021                 else
17022                 {
17023                         TCU_THROW(InternalError, "Unknown flavor");
17024                 }
17025
17026                 return true;
17027         }
17028 };
17029
17030 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17031 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17032 {
17033         fp16MatrixTimesMatrix() : fp16MatrixBase()
17034         {
17035                 flavorNames.push_back("EmulatingFP16");
17036                 flavorNames.push_back("FloatCalc");
17037         }
17038
17039         virtual double getULPs (vector<const deFloat16*>& in)
17040         {
17041                 DE_UNREF(in);
17042
17043                 return 32.0;
17044         }
17045
17046         deUint32 getComponentValidity ()
17047         {
17048                 return getComponentMatrixValidityMask(colsR, rowsL);
17049         }
17050
17051         template<class fp16type>
17052         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17053         {
17054                 DE_STATIC_ASSERT(colsL == rowsR);
17055
17056                 DE_ASSERT(in.size() == 2);
17057
17058                 const size_t    alignedColsL    = (colsL == 3) ? 4 : colsL;
17059                 const size_t    alignedRowsL    = (rowsL == 3) ? 4 : rowsL;
17060                 const size_t    alignedColsR    = (colsR == 3) ? 4 : colsR;
17061                 const size_t    alignedRowsR    = (rowsR == 3) ? 4 : rowsR;
17062
17063                 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17064                 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17065                 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17066                 DE_UNREF(alignedColsL);
17067                 DE_UNREF(alignedColsR);
17068
17069                 if (getFlavor() == 0)
17070                 {
17071                         for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17072                         {
17073                                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17074                                 {
17075                                         const size_t    ndx     (colNdx * alignedRowsL + rowNdx);
17076                                         fp16type                s       (fp16type::zero(1));
17077
17078                                         for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17079                                         {
17080                                                 const size_t    ndxl    (commonNdx * alignedRowsL + rowNdx);
17081                                                 const fp16type  l               (in[0][ndxl]);
17082                                                 const float             lf              (l.asFloat());
17083                                                 const size_t    ndxr    (colNdx * alignedRowsR + commonNdx);
17084                                                 const fp16type  r               (in[1][ndxr]);
17085                                                 const float             rf              (r.asFloat());
17086                                                 const fp16type  m               (lf * rf);
17087
17088                                                 s = fp16type(s.asFloat() + m.asFloat());
17089                                         }
17090
17091                                         out[ndx] = s.bits();
17092                                         min[ndx] = getMin(s.asDouble(), getULPs(in));
17093                                         max[ndx] = getMax(s.asDouble(), getULPs(in));
17094                                 }
17095                         }
17096                 }
17097                 else if (getFlavor() == 1)
17098                 {
17099                         for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17100                         {
17101                                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17102                                 {
17103                                         const size_t    ndx     (colNdx * alignedRowsL + rowNdx);
17104                                         float                   s       (0.0f);
17105
17106                                         for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17107                                         {
17108                                                 const size_t    ndxl    (commonNdx * alignedRowsL + rowNdx);
17109                                                 const fp16type  l               (in[0][ndxl]);
17110                                                 const float             lf              (l.asFloat());
17111                                                 const size_t    ndxr    (colNdx * alignedRowsR + commonNdx);
17112                                                 const fp16type  r               (in[1][ndxr]);
17113                                                 const float             rf              (r.asFloat());
17114                                                 const float             m               (lf * rf);
17115
17116                                                 s += m;
17117                                         }
17118
17119                                         out[ndx] = fp16type(s).bits();
17120                                         min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17121                                         max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17122                                 }
17123                         }
17124                 }
17125                 else
17126                 {
17127                         TCU_THROW(InternalError, "Unknown flavor");
17128                 }
17129
17130                 return true;
17131         }
17132 };
17133
17134 template<size_t cols, size_t rows>
17135 struct fp16OuterProduct : public fp16MatrixBase
17136 {
17137         virtual double getULPs (vector<const deFloat16*>& in)
17138         {
17139                 DE_UNREF(in);
17140
17141                 return 2.0;
17142         }
17143
17144         deUint32 getComponentValidity ()
17145         {
17146                 return getComponentMatrixValidityMask(cols, rows);
17147         }
17148
17149         template<class fp16type>
17150         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17151         {
17152                 DE_ASSERT(in.size() == 2);
17153
17154                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
17155                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
17156
17157                 DE_ASSERT(getArgCompCount(0) == rows);
17158                 DE_ASSERT(getArgCompCount(1) == cols);
17159                 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17160                 DE_UNREF(alignedCols);
17161
17162                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17163                 {
17164                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17165                         {
17166                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
17167                                 const fp16type  x       (in[0][rowNdx]);
17168                                 const float             xf      (x.asFloat());
17169                                 const fp16type  y       (in[1][colNdx]);
17170                                 const float             yf      (y.asFloat());
17171                                 const fp16type  m       (xf * yf);
17172
17173                                 out[ndx] = m.bits();
17174                                 min[ndx] = getMin(m.asDouble(), getULPs(in));
17175                                 max[ndx] = getMax(m.asDouble(), getULPs(in));
17176                         }
17177                 }
17178
17179                 return true;
17180         }
17181 };
17182
17183 template<size_t size>
17184 struct fp16Determinant;
17185
17186 template<>
17187 struct fp16Determinant<2> : public fp16MatrixBase
17188 {
17189         virtual double getULPs (vector<const deFloat16*>& in)
17190         {
17191                 DE_UNREF(in);
17192
17193                 return 128.0; // This is not a precision test. Value is not from spec
17194         }
17195
17196         deUint32 getComponentValidity ()
17197         {
17198                 return 1;
17199         }
17200
17201         template<class fp16type>
17202         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17203         {
17204                 const size_t    cols            = 2;
17205                 const size_t    rows            = 2;
17206                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
17207                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
17208
17209                 DE_ASSERT(in.size() == 1);
17210                 DE_ASSERT(getOutCompCount() == 1);
17211                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17212                 DE_UNREF(alignedCols);
17213                 DE_UNREF(alignedRows);
17214
17215                 // [ a b ]
17216                 // [ c d ]
17217                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17218                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17219                 const float             c               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17220                 const float             d               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17221                 const float             ad              (a * d);
17222                 const fp16type  adf16   (ad);
17223                 const float             bc              (b * c);
17224                 const fp16type  bcf16   (bc);
17225                 const float             r               (adf16.asFloat() - bcf16.asFloat());
17226                 const fp16type  rf16    (r);
17227
17228                 out[0] = rf16.bits();
17229                 min[0] = getMin(r, getULPs(in));
17230                 max[0] = getMax(r, getULPs(in));
17231
17232                 return true;
17233         }
17234 };
17235
17236 template<>
17237 struct fp16Determinant<3> : public fp16MatrixBase
17238 {
17239         virtual double getULPs (vector<const deFloat16*>& in)
17240         {
17241                 DE_UNREF(in);
17242
17243                 return 128.0; // This is not a precision test. Value is not from spec
17244         }
17245
17246         deUint32 getComponentValidity ()
17247         {
17248                 return 1;
17249         }
17250
17251         template<class fp16type>
17252         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17253         {
17254                 const size_t    cols            = 3;
17255                 const size_t    rows            = 3;
17256                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
17257                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
17258
17259                 DE_ASSERT(in.size() == 1);
17260                 DE_ASSERT(getOutCompCount() == 1);
17261                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17262                 DE_UNREF(alignedCols);
17263                 DE_UNREF(alignedRows);
17264
17265                 // [ a b c ]
17266                 // [ d e f ]
17267                 // [ g h i ]
17268                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17269                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17270                 const float             c               (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17271                 const float             d               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17272                 const float             e               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17273                 const float             f               (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17274                 const float             g               (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17275                 const float             h               (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17276                 const float             i               (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17277                 const fp16type  aei             (a * e * i);
17278                 const fp16type  bfg             (b * f * g);
17279                 const fp16type  cdh             (c * d * h);
17280                 const fp16type  ceg             (c * e * g);
17281                 const fp16type  bdi             (b * d * i);
17282                 const fp16type  afh             (a * f * h);
17283                 const float             r               (aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17284                 const fp16type  rf16    (r);
17285
17286                 out[0] = rf16.bits();
17287                 min[0] = getMin(r, getULPs(in));
17288                 max[0] = getMax(r, getULPs(in));
17289
17290                 return true;
17291         }
17292 };
17293
17294 template<>
17295 struct fp16Determinant<4> : public fp16MatrixBase
17296 {
17297         virtual double getULPs (vector<const deFloat16*>& in)
17298         {
17299                 DE_UNREF(in);
17300
17301                 return 128.0; // This is not a precision test. Value is not from spec
17302         }
17303
17304         deUint32 getComponentValidity ()
17305         {
17306                 return 1;
17307         }
17308
17309         template<class fp16type>
17310         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17311         {
17312                 const size_t    rows            = 4;
17313                 const size_t    cols            = 4;
17314                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
17315                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
17316
17317                 DE_ASSERT(in.size() == 1);
17318                 DE_ASSERT(getOutCompCount() == 1);
17319                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17320                 DE_UNREF(alignedCols);
17321                 DE_UNREF(alignedRows);
17322
17323                 // [ a b c d ]
17324                 // [ e f g h ]
17325                 // [ i j k l ]
17326                 // [ m n o p ]
17327                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17328                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17329                 const float             c               (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17330                 const float             d               (fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17331                 const float             e               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17332                 const float             f               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17333                 const float             g               (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17334                 const float             h               (fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17335                 const float             i               (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17336                 const float             j               (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17337                 const float             k               (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17338                 const float             l               (fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17339                 const float             m               (fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17340                 const float             n               (fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17341                 const float             o               (fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17342                 const float             p               (fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17343
17344                 // [ f g h ]
17345                 // [ j k l ]
17346                 // [ n o p ]
17347                 const fp16type  fkp             (f * k * p);
17348                 const fp16type  gln             (g * l * n);
17349                 const fp16type  hjo             (h * j * o);
17350                 const fp16type  hkn             (h * k * n);
17351                 const fp16type  gjp             (g * j * p);
17352                 const fp16type  flo             (f * l * o);
17353                 const fp16type  detA    (a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17354
17355                 // [ e g h ]
17356                 // [ i k l ]
17357                 // [ m o p ]
17358                 const fp16type  ekp             (e * k * p);
17359                 const fp16type  glm             (g * l * m);
17360                 const fp16type  hio             (h * i * o);
17361                 const fp16type  hkm             (h * k * m);
17362                 const fp16type  gip             (g * i * p);
17363                 const fp16type  elo             (e * l * o);
17364                 const fp16type  detB    (b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17365
17366                 // [ e f h ]
17367                 // [ i j l ]
17368                 // [ m n p ]
17369                 const fp16type  ejp             (e * j * p);
17370                 const fp16type  flm             (f * l * m);
17371                 const fp16type  hin             (h * i * n);
17372                 const fp16type  hjm             (h * j * m);
17373                 const fp16type  fip             (f * i * p);
17374                 const fp16type  eln             (e * l * n);
17375                 const fp16type  detC    (c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17376
17377                 // [ e f g ]
17378                 // [ i j k ]
17379                 // [ m n o ]
17380                 const fp16type  ejo             (e * j * o);
17381                 const fp16type  fkm             (f * k * m);
17382                 const fp16type  gin             (g * i * n);
17383                 const fp16type  gjm             (g * j * m);
17384                 const fp16type  fio             (f * i * o);
17385                 const fp16type  ekn             (e * k * n);
17386                 const fp16type  detD    (d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17387
17388                 const float             r               (detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17389                 const fp16type  rf16    (r);
17390
17391                 out[0] = rf16.bits();
17392                 min[0] = getMin(r, getULPs(in));
17393                 max[0] = getMax(r, getULPs(in));
17394
17395                 return true;
17396         }
17397 };
17398
17399 template<size_t size>
17400 struct fp16Inverse;
17401
17402 template<>
17403 struct fp16Inverse<2> : public fp16MatrixBase
17404 {
17405         virtual double getULPs (vector<const deFloat16*>& in)
17406         {
17407                 DE_UNREF(in);
17408
17409                 return 128.0; // This is not a precision test. Value is not from spec
17410         }
17411
17412         deUint32 getComponentValidity ()
17413         {
17414                 return getComponentMatrixValidityMask(2, 2);
17415         }
17416
17417         template<class fp16type>
17418         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17419         {
17420                 const size_t    cols            = 2;
17421                 const size_t    rows            = 2;
17422                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
17423                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
17424
17425                 DE_ASSERT(in.size() == 1);
17426                 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17427                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17428                 DE_UNREF(alignedCols);
17429
17430                 // [ a b ]
17431                 // [ c d ]
17432                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17433                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17434                 const float             c               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17435                 const float             d               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17436                 const float             ad              (a * d);
17437                 const fp16type  adf16   (ad);
17438                 const float             bc              (b * c);
17439                 const fp16type  bcf16   (bc);
17440                 const float             det             (adf16.asFloat() - bcf16.asFloat());
17441                 const fp16type  det16   (det);
17442
17443                 out[0] = fp16type( d / det16.asFloat()).bits();
17444                 out[1] = fp16type(-c / det16.asFloat()).bits();
17445                 out[2] = fp16type(-b / det16.asFloat()).bits();
17446                 out[3] = fp16type( a / det16.asFloat()).bits();
17447
17448                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17449                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17450                         {
17451                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
17452                                 const fp16type  s       (out[ndx]);
17453
17454                                 min[ndx] = getMin(s.asDouble(), getULPs(in));
17455                                 max[ndx] = getMax(s.asDouble(), getULPs(in));
17456                         }
17457
17458                 return true;
17459         }
17460 };
17461
17462 inline std::string fp16ToString(deFloat16 val)
17463 {
17464         return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17465 }
17466
17467 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
17468 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
17469 {
17470         if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17471                 return false;
17472
17473         const size_t    resultStep                      = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17474         const size_t    iterationsCount         = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17475         const size_t    inputsSteps[3]          =
17476         {
17477                 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17478                 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17479                 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17480         };
17481
17482         DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17483         DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17484
17485         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17486         {
17487                 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17488                 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17489         }
17490
17491         const deFloat16* const          outputAsFP16                                    = (const deFloat16*)outputAllocs[0]->getHostPtr();
17492         TestedArithmeticFunction        func;
17493
17494         func.setOutCompCount(RES_COMPONENTS);
17495         func.setArgCompCount(0, ARG0_COMPONENTS);
17496         func.setArgCompCount(1, ARG1_COMPONENTS);
17497         func.setArgCompCount(2, ARG2_COMPONENTS);
17498
17499         const bool                                      callOncePerComponent                    = func.callOncePerComponent();
17500         const deUint32                          componentValidityMask                   = func.getComponentValidity();
17501         const size_t                            denormModesCount                                = 2;
17502         const char*                                     denormModes[denormModesCount]   = { "keep denormal numbers", "flush to zero" };
17503         const size_t                            successfulRunsPerComponent              = denormModesCount * func.getFlavorCount();
17504         bool                                            success                                                 = true;
17505         size_t                                          validatedCount                                  = 0;
17506
17507         vector<deUint8> inputBytes[3];
17508
17509         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17510                 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17511
17512         const deFloat16* const                  inputsAsFP16[3]                 =
17513         {
17514                 inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
17515                 inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
17516                 inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
17517         };
17518
17519         for (size_t idx = 0; idx < iterationsCount; ++idx)
17520         {
17521                 std::vector<size_t>                     successfulRuns          (RES_COMPONENTS, successfulRunsPerComponent);
17522                 std::vector<std::string>        errors                          (RES_COMPONENTS);
17523                 bool                                            iterationValidated      (true);
17524
17525                 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17526                 {
17527                         for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17528                         {
17529                                 func.setFlavor(flavorNdx);
17530
17531                                 const deFloat16*                        iterationOutputFP16             = &outputAsFP16[idx * resultStep];
17532                                 vector<deFloat16>                       iterationCalculatedFP16 (resultStep, 0);
17533                                 vector<double>                          iterationEdgeMin                (resultStep, 0.0);
17534                                 vector<double>                          iterationEdgeMax                (resultStep, 0.0);
17535                                 vector<const deFloat16*>        arguments;
17536
17537                                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17538                                 {
17539                                         std::string     error;
17540                                         bool            reportError = false;
17541
17542                                         if (callOncePerComponent || componentNdx == 0)
17543                                         {
17544                                                 bool funcCallResult;
17545
17546                                                 arguments.clear();
17547
17548                                                 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17549                                                         arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17550
17551                                                 if (denormNdx == 0)
17552                                                         funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17553                                                 else
17554                                                         funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17555
17556                                                 if (!funcCallResult)
17557                                                 {
17558                                                         iterationValidated = false;
17559
17560                                                         if (callOncePerComponent)
17561                                                                 continue;
17562                                                         else
17563                                                                 break;
17564                                                 }
17565                                         }
17566
17567                                         if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17568                                                 continue;
17569
17570                                         reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17571
17572                                         if (reportError)
17573                                         {
17574                                                 tcu::Float16 expected   (iterationCalculatedFP16[componentNdx]);
17575                                                 tcu::Float16 outputted  (iterationOutputFP16[componentNdx]);
17576                                                 tcu::Float64 edgeMin    (iterationEdgeMin[componentNdx]);
17577                                                 tcu::Float64 edgeMax    (iterationEdgeMax[componentNdx]);
17578
17579                                                 if (reportError && expected.isNaN())
17580                                                         reportError = false;
17581
17582                                                 if (reportError && !expected.isNaN() && !outputted.isNaN())
17583                                                 {
17584                                                         if (reportError && !expected.isInf() && !outputted.isInf())
17585                                                         {
17586                                                                 // Ignore rounding
17587                                                                 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17588                                                                         reportError = false;
17589                                                         }
17590
17591                                                         if (reportError && expected.isInf())
17592                                                         {
17593                                                                 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17594                                                                 if (expected.sign() == 1 && outputted.bits() == 0x7bff && edgeMin.asDouble() <= std::numeric_limits<double>::max())
17595                                                                         reportError = false;
17596                                                                 else if (expected.sign() == -1 && outputted.bits() == 0xfbff && edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17597                                                                         reportError = false;
17598                                                         }
17599
17600                                                         if (reportError)
17601                                                         {
17602                                                                 const double    outputtedDouble = outputted.asDouble();
17603
17604                                                             DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() || (edgeMin.asDouble() <= edgeMax.asDouble()));
17605
17606                                                                 if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17607                                                                         reportError = false;
17608                                                         }
17609                                                 }
17610
17611                                                 if (reportError)
17612                                                 {
17613                                                         const size_t            inputsComps[3]  =
17614                                                         {
17615                                                                 ARG0_COMPONENTS,
17616                                                                 ARG1_COMPONENTS,
17617                                                                 ARG2_COMPONENTS,
17618                                                         };
17619                                                         string                          inputsValues    ("Inputs:");
17620                                                         string                          flavorName              (func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17621                                                         std::stringstream       errStream;
17622
17623                                                         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17624                                                         {
17625                                                                 const size_t    inputCompsCount = inputsComps[inputNdx];
17626
17627                                                                 inputsValues += " [" + de::toString(inputNdx) + "]=(";
17628
17629                                                                 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17630                                                                 {
17631                                                                         const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17632
17633                                                                         inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17634                                                                 }
17635                                                         }
17636
17637                                                         errStream       << "At"
17638                                                                                 << " iteration " << de::toString(idx)
17639                                                                                 << " component " << de::toString(componentNdx)
17640                                                                                 << " denormMode " << de::toString(denormNdx)
17641                                                                                 << " (" << denormModes[denormNdx] << ")"
17642                                                                                 << " " << flavorName
17643                                                                                 << " " << inputsValues
17644                                                                                 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17645                                                                                 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17646                                                                                 << " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17647                                                                                 << " " << error << "."
17648                                                                                 << std::endl;
17649
17650                                                         errors[componentNdx] += errStream.str();
17651
17652                                                         successfulRuns[componentNdx]--;
17653                                                 }
17654                                         }
17655                                 }
17656                         }
17657                 }
17658
17659                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17660                 {
17661                         // Check if any component has total failure
17662                         if (successfulRuns[componentNdx] == 0)
17663                         {
17664                                 // Test failed in all denorm modes and all flavors for certain component: dump errors
17665                                 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17666
17667                                 success = false;
17668                         }
17669                 }
17670
17671                 if (iterationValidated)
17672                         validatedCount++;
17673         }
17674
17675         if (validatedCount < 16)
17676                 TCU_THROW(InternalError, "Too few samples have been validated.");
17677
17678         return success;
17679 }
17680
17681 // IEEE-754 floating point numbers:
17682 // +--------+------+----------+-------------+
17683 // | binary | sign | exponent | significand |
17684 // +--------+------+----------+-------------+
17685 // | 16-bit |  1   |    5     |     10      |
17686 // +--------+------+----------+-------------+
17687 // | 32-bit |  1   |    8     |     23      |
17688 // +--------+------+----------+-------------+
17689 //
17690 // 16-bit floats:
17691 //
17692 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
17693 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17694 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
17695 // 0   111 10   11 1111 1111 (0x7bff: 65504:         maximum positive normalized)
17696 //
17697 // 0   000 00   00 0000 0000 (0x0000: +0)
17698 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
17699 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
17700 // 0   000 01   00 0000 0001 (0x0401: +Norm)
17701 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
17702 // 0   111 11   11 1111 0000 (0x7ff0: +QNaN)
17703 // Generate and return 16-bit floats and their corresponding 32-bit values.
17704 //
17705 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17706 // Expected count to be at least 14 (numPicks).
17707 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17708 {
17709         vector<deFloat16>       float16;
17710
17711         float16.reserve(count);
17712
17713         // Zero
17714         float16.push_back(deUint16(0x0000));
17715         float16.push_back(deUint16(0x8000));
17716         // Infinity
17717         float16.push_back(deUint16(0x7c00));
17718         float16.push_back(deUint16(0xfc00));
17719         // Normalized
17720         float16.push_back(deUint16(0x0401));
17721         float16.push_back(deUint16(0x8401));
17722         // Some normal number
17723         float16.push_back(deUint16(0x14cb));
17724         float16.push_back(deUint16(0x94cb));
17725         // Min/max positive normal
17726         float16.push_back(deUint16(0x0400));
17727         float16.push_back(deUint16(0x7bff));
17728         // Min/max negative normal
17729         float16.push_back(deUint16(0x8400));
17730         float16.push_back(deUint16(0xfbff));
17731         // PI
17732         float16.push_back(deUint16(0x4248)); // 3.140625
17733         float16.push_back(deUint16(0xb248)); // -3.140625
17734         // PI/2
17735         float16.push_back(deUint16(0x3e48)); // 1.5703125
17736         float16.push_back(deUint16(0xbe48)); // -1.5703125
17737         float16.push_back(deUint16(0x3c00)); // 1.0
17738         float16.push_back(deUint16(0x3800)); // 0.5
17739         // Some useful constants
17740         float16.push_back(tcu::Float16(-2.5f).bits());
17741         float16.push_back(tcu::Float16(-1.0f).bits());
17742         float16.push_back(tcu::Float16( 0.4f).bits());
17743         float16.push_back(tcu::Float16( 2.5f).bits());
17744
17745         const deUint32          numPicks        = static_cast<deUint32>(float16.size());
17746
17747         DE_ASSERT(count >= numPicks);
17748         count -= numPicks;
17749
17750         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17751         {
17752                 int                     sign            = (rnd.getUint16() % 2 == 0) ? +1 : -1;
17753                 int                     exponent        = (rnd.getUint16() % 29) - 14 + 1;
17754                 deUint16        mantissa        = static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17755
17756                 // Exclude power of -14 to avoid denorms
17757                 DE_ASSERT(de::inRange(exponent, -13, 15));
17758
17759                 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17760         }
17761
17762         return float16;
17763 }
17764
17765 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17766 {
17767         DE_UNREF(argNo);
17768
17769         de::Random      rnd(seed);
17770
17771         return getFloat16a(rnd, static_cast<deUint32>(count));
17772 }
17773
17774 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17775 {
17776         de::Random      rnd             (seed);
17777         size_t          newCount = static_cast<size_t>(deSqrt(double(count)));
17778
17779         DE_ASSERT(newCount * newCount == count);
17780
17781         vector<deFloat16>       float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17782
17783         return squarize(float16, static_cast<deUint32>(argNo));
17784 }
17785
17786 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17787 {
17788         if (argNo == 0 || argNo == 1)
17789                 return getInputData2(seed, count, argNo);
17790         else
17791                 return getInputData1(seed<<argNo, count, argNo);
17792 }
17793
17794 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17795 {
17796         DE_UNREF(stride);
17797
17798         vector<deFloat16>       result;
17799
17800         switch (argCount)
17801         {
17802                 case 1:result = getInputData1(seed, count, argNo); break;
17803                 case 2:result = getInputData2(seed, count, argNo); break;
17804                 case 3:result = getInputData3(seed, count, argNo); break;
17805                 default: TCU_THROW(InternalError, "Invalid argument count specified");
17806         }
17807
17808         if (compCount == 3)
17809         {
17810                 const size_t            newCount = (3 * count) / 4;
17811                 vector<deFloat16>       newResult;
17812
17813                 newResult.reserve(result.size());
17814
17815                 for (size_t ndx = 0; ndx < newCount; ++ndx)
17816                 {
17817                         newResult.push_back(result[ndx]);
17818
17819                         if (ndx % 3 == 2)
17820                                 newResult.push_back(0);
17821                 }
17822
17823                 result = newResult;
17824         }
17825
17826         DE_ASSERT(result.size() == count);
17827
17828         return result;
17829 }
17830
17831 // Generator for functions requiring data in range [1, inf]
17832 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17833 {
17834         vector<deFloat16>       result;
17835
17836         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17837
17838         // Filter out values below 1.0 from upper half of numbers
17839         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17840         {
17841                 const float f = tcu::Float16(result[idx]).asFloat();
17842
17843                 if (f < 1.0f)
17844                         result[idx] = tcu::Float16(1.0f - f).bits();
17845         }
17846
17847         return result;
17848 }
17849
17850 // Generator for functions requiring data in range [-1, 1]
17851 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17852 {
17853         vector<deFloat16>       result;
17854
17855         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17856
17857         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17858         {
17859                 const float f = tcu::Float16(result[idx]).asFloat();
17860
17861                 if (!de::inRange(f, -1.0f, 1.0f))
17862                         result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17863         }
17864
17865         return result;
17866 }
17867
17868 // Generator for functions requiring data in range [-pi, pi]
17869 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17870 {
17871         vector<deFloat16>       result;
17872
17873         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17874
17875         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17876         {
17877                 const float f = tcu::Float16(result[idx]).asFloat();
17878
17879                 if (!de::inRange(f, -DE_PI, DE_PI))
17880                         result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17881         }
17882
17883         return result;
17884 }
17885
17886 // Generator for functions requiring data in range [0, inf]
17887 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17888 {
17889         vector<deFloat16>       result;
17890
17891         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17892
17893         if (argNo == 0)
17894         {
17895                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17896                         result[idx] &= static_cast<deFloat16>(~0x8000);
17897         }
17898
17899         return result;
17900 }
17901
17902 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17903 {
17904         DE_UNREF(stride);
17905         DE_UNREF(argCount);
17906
17907         vector<deFloat16>       result;
17908
17909         if (argNo == 0)
17910                 result = getInputData2(seed, count, argNo);
17911         else
17912         {
17913                 const size_t            alignedCount    = (compCount == 3) ? 4 : compCount;
17914                 const size_t            newCountX               = static_cast<size_t>(deSqrt(double(count * alignedCount)));
17915                 const size_t            newCountY               = count / newCountX;
17916                 de::Random                      rnd                             (seed);
17917                 vector<deFloat16>       float16                 = getFloat16a(rnd, static_cast<deUint32>(newCountX));
17918
17919                 DE_ASSERT(newCountX * newCountX == alignedCount * count);
17920
17921                 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
17922                 {
17923                         const vector<deFloat16> tmp(newCountY, float16[numIdx]);
17924
17925                         result.insert(result.end(), tmp.begin(), tmp.end());
17926                 }
17927         }
17928
17929         DE_ASSERT(result.size() == count);
17930
17931         return result;
17932 }
17933
17934 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17935 {
17936         DE_UNREF(compCount);
17937         DE_UNREF(stride);
17938         DE_UNREF(argCount);
17939
17940         de::Random                      rnd             (seed << argNo);
17941         vector<deFloat16>       result;
17942
17943         result = getFloat16a(rnd, static_cast<deUint32>(count));
17944
17945         DE_ASSERT(result.size() == count);
17946
17947         return result;
17948 }
17949
17950 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17951 {
17952         DE_UNREF(compCount);
17953         DE_UNREF(argCount);
17954
17955         de::Random                      rnd             (seed << argNo);
17956         vector<deFloat16>       result;
17957
17958         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17959         {
17960                 int num = (rnd.getUint16() % 16) - 8;
17961
17962                 result.push_back(tcu::Float16(float(num)).bits());
17963         }
17964
17965         result[0 * stride] = deUint16(0x7c00); // +Inf
17966         result[1 * stride] = deUint16(0xfc00); // -Inf
17967
17968         DE_ASSERT(result.size() == count);
17969
17970         return result;
17971 }
17972
17973 // Generator for smoothstep function
17974 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17975 {
17976         vector<deFloat16>       result;
17977
17978         result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
17979
17980         if (argNo == 0)
17981         {
17982                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17983                 {
17984                         const float f = tcu::Float16(result[idx]).asFloat();
17985
17986                         if (f > 4.0f)
17987                                 result[idx] = tcu::Float16(-f).bits();
17988                 }
17989         }
17990
17991         if (argNo == 1)
17992         {
17993                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17994                 {
17995                         const float f = tcu::Float16(result[idx]).asFloat();
17996
17997                         if (f < 4.0f)
17998                                 result[idx] = tcu::Float16(-f).bits();
17999                 }
18000         }
18001
18002         return result;
18003 }
18004
18005 // Generates normalized vectors for arguments 0 and 1
18006 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18007 {
18008         DE_UNREF(compCount);
18009         DE_UNREF(argCount);
18010
18011         de::Random                      rnd             (seed << argNo);
18012         vector<deFloat16>       result;
18013
18014         if (argNo == 0 || argNo == 1)
18015         {
18016                 // The input parameters for the incident vector I and the surface normal N must already be normalized
18017                 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18018                 {
18019                         vector <float>  unnormolized;
18020                         float                   sum                             = 0;
18021
18022                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18023                                 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18024
18025                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18026                                 sum += unnormolized[compIdx] * unnormolized[compIdx];
18027
18028                         sum = deFloatSqrt(sum);
18029                         if (sum == 0.0f)
18030                                 unnormolized[0] = sum = 1.0f;
18031
18032                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18033                                 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18034
18035                         for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18036                                 result.push_back(0);
18037                 }
18038         }
18039         else
18040         {
18041                 // Input parameter eta
18042                 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18043                 {
18044                         int num = (rnd.getUint16() % 16) - 8;
18045
18046                         result.push_back(tcu::Float16(float(num)).bits());
18047                 }
18048         }
18049
18050         DE_ASSERT(result.size() == count);
18051
18052         return result;
18053 }
18054
18055 // Data generator for complex matrix functions like determinant and inverse
18056 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18057 {
18058         DE_UNREF(compCount);
18059         DE_UNREF(stride);
18060         DE_UNREF(argCount);
18061
18062         de::Random                      rnd             (seed << argNo);
18063         vector<deFloat16>       result;
18064
18065         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18066         {
18067                 int num = (rnd.getUint16() % 16) - 8;
18068
18069                 result.push_back(tcu::Float16(float(num)).bits());
18070         }
18071
18072         DE_ASSERT(result.size() == count);
18073
18074         return result;
18075 }
18076
18077 struct Math16TestType
18078 {
18079         const char*             typePrefix;
18080         const size_t    typeComponents;
18081         const size_t    typeArrayStride;
18082         const size_t    typeStructStride;
18083         const char*             storage_type;
18084 };
18085
18086 enum Math16DataTypes
18087 {
18088         NONE    = 0,
18089         SCALAR  = 1,
18090         VEC2    = 2,
18091         VEC3    = 3,
18092         VEC4    = 4,
18093         MAT2X2,
18094         MAT2X3,
18095         MAT2X4,
18096         MAT3X2,
18097         MAT3X3,
18098         MAT3X4,
18099         MAT4X2,
18100         MAT4X3,
18101         MAT4X4,
18102         MATH16_TYPE_LAST
18103 };
18104
18105 struct Math16ArgFragments
18106 {
18107         const char*     bodies;
18108         const char*     variables;
18109         const char*     decorations;
18110         const char*     funcVariables;
18111 };
18112
18113 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
18114
18115 struct Math16TestFunc
18116 {
18117         const char*                                     funcName;
18118         const char*                                     funcSuffix;
18119         size_t                                          funcArgsCount;
18120         size_t                                          typeResult;
18121         size_t                                          typeArg0;
18122         size_t                                          typeArg1;
18123         size_t                                          typeArg2;
18124         Math16GetInputData*                     getInputDataFunc;
18125         VerifyIOFunc                            verifyFunc;
18126 };
18127
18128 template<class SpecResource>
18129 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
18130 {
18131         const int                                       testSpecificSeed                        = deStringHash(testGroup.getName());
18132         const int                                       seed                                            = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18133         const size_t                            numDataPointsByAxis                     = 32;
18134         const size_t                            numDataPoints                           = numDataPointsByAxis * numDataPointsByAxis;
18135         const char*                                     componentType                           = "f16";
18136         const Math16TestType            testTypes[MATH16_TYPE_LAST]     =
18137         {
18138                 { "",           0,       0,                                              0,                                             "" },
18139                 { "",           1,       1 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_half_ndp" },
18140                 { "v2",         2,       2 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_ndp" },
18141                 { "v3",         3,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
18142                 { "v4",         4,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
18143                 { "m2x2",       0,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
18144                 { "m2x3",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
18145                 { "m2x4",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
18146                 { "m3x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_3" },
18147                 { "m3x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18148                 { "m3x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18149                 { "m4x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
18150                 { "m4x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18151                 { "m4x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18152         };
18153
18154         DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18155
18156
18157         const StringTemplate preMain
18158         (
18159                 "     %c_i32_ndp  = OpConstant %i32 ${num_data_points}\n"
18160
18161                 "        %f16     = OpTypeFloat 16\n"
18162                 "        %v2f16   = OpTypeVector %f16 2\n"
18163                 "        %v3f16   = OpTypeVector %f16 3\n"
18164                 "        %v4f16   = OpTypeVector %f16 4\n"
18165                 "        %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18166                 "        %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18167                 "        %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18168                 "        %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18169                 "        %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18170                 "        %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18171                 "        %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18172                 "        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18173                 "        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18174
18175                 "       %fp_v2i32 = OpTypePointer Function %v2i32\n"
18176                 "       %fp_v3i32 = OpTypePointer Function %v3i32\n"
18177                 "       %fp_v4i32 = OpTypePointer Function %v4i32\n"
18178
18179                 "      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18180                 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18181                 "        %c_u32_5 = OpConstant %u32 5\n"
18182                 "        %c_u32_6 = OpConstant %u32 6\n"
18183                 "        %c_u32_7 = OpConstant %u32 7\n"
18184                 "        %c_u32_8 = OpConstant %u32 8\n"
18185                 "        %c_f16_0 = OpConstant %f16 0\n"
18186                 "        %c_f16_1 = OpConstant %f16 1\n"
18187                 "      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18188                 "         %up_u32 = OpTypePointer Uniform %u32\n"
18189                 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18190                 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18191
18192                 "    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18193                 "  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18194                 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18195                 "         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18196                 "       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18197                 "    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18198                 "           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18199                 "        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18200                 "      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18201                 "     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18202                 "  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18203                 "           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18204                 "        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18205                 "        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18206                 "     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18207                 "  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18208                 "           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18209                 "        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18210                 "        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18211                 "     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18212                 "  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18213                 "           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18214                 "        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18215                 "        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18216                 "     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18217                 "  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18218                 "           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18219                 "        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18220                 "        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18221                 "     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18222                 "  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18223
18224                 "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18225                 "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18226                 "       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18227                 "       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18228                 "     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18229                 "     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18230                 "     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18231                 "     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18232                 "     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18233                 "     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18234                 "     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18235                 "     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18236                 "     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18237                 "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18238                 "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18239                 "  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18240                 "  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18241                 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18242                 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18243                 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18244                 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18245                 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18246                 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18247                 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18248                 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18249                 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18250                 "${arg_vars}"
18251         );
18252
18253         const StringTemplate decoration
18254         (
18255                 "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18256                 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18257                 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18258
18259                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
18260                 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18261                 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
18262
18263                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
18264                 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18265                 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18266                 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18267
18268                 "OpDecorate %ra_u32_4 ArrayStride 4\n"
18269                 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18270                 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18271                 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18272
18273                 "OpDecorate %ra_u32_3 ArrayStride 4\n"
18274                 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18275                 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18276                 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18277
18278                 "OpDecorate %ra_u32_6 ArrayStride 4\n"
18279                 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18280                 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18281                 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18282
18283                 "OpDecorate %ra_u32_8 ArrayStride 4\n"
18284                 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18285                 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18286                 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18287
18288                 "${arg_decorations}"
18289         );
18290
18291         const StringTemplate testFun
18292         (
18293                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18294                 "    %param = OpFunctionParameter %v4f32\n"
18295                 "    %entry = OpLabel\n"
18296
18297                 "        %i = OpVariable %fp_i32 Function\n"
18298                 "${arg_infunc_vars}"
18299                 "             OpStore %i %c_i32_0\n"
18300                 "             OpBranch %loop\n"
18301
18302                 "     %loop = OpLabel\n"
18303                 "    %i_cmp = OpLoad %i32 %i\n"
18304                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18305                 "             OpLoopMerge %merge %next None\n"
18306                 "             OpBranchConditional %lt %write %merge\n"
18307
18308                 "    %write = OpLabel\n"
18309                 "      %ndx = OpLoad %i32 %i\n"
18310
18311                 "${arg_func_call}"
18312
18313                 "             OpBranch %next\n"
18314
18315                 "     %next = OpLabel\n"
18316                 "    %i_cur = OpLoad %i32 %i\n"
18317                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18318                 "             OpStore %i %i_new\n"
18319                 "             OpBranch %loop\n"
18320
18321                 "    %merge = OpLabel\n"
18322                 "             OpReturnValue %param\n"
18323                 "             OpFunctionEnd\n"
18324         );
18325
18326         const Math16ArgFragments        argFragment1    =
18327         {
18328                 "     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18329                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18330                 "     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18331                 "",
18332                 "",
18333                 "",
18334         };
18335
18336         const Math16ArgFragments        argFragment2    =
18337         {
18338                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18339                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18340                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18341                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18342                 "",
18343                 "",
18344                 "",
18345         };
18346
18347         const Math16ArgFragments        argFragment3    =
18348         {
18349                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18350                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18351                 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18352                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18353                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18354                 "",
18355                 "",
18356                 "",
18357         };
18358
18359         const Math16ArgFragments        argFragmentLdExp        =
18360         {
18361                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18362                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18363                 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18364                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18365                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18366
18367                 "",
18368
18369                 "",
18370
18371                 "",
18372         };
18373
18374         const Math16ArgFragments        argFragmentModfFrac     =
18375         {
18376                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18377                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18378                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18379
18380                 "   %fp_tmp = OpTypePointer Function %${tr}\n",
18381
18382                 "",
18383
18384                 "      %tmp = OpVariable %fp_tmp Function\n",
18385         };
18386
18387         const Math16ArgFragments        argFragmentModfInt      =
18388         {
18389                 " %val_src0  = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18390                 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18391                 "     %tmp0  = OpAccessChain %fp_tmp %tmp\n"
18392                 "  %val_dst  = OpLoad %${tr} %tmp0\n"
18393                 "      %dst  = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18394
18395                 "   %fp_tmp  = OpTypePointer Function %${tr}\n",
18396
18397                 "",
18398
18399                 "      %tmp  = OpVariable %fp_tmp Function\n",
18400         };
18401
18402         const Math16ArgFragments        argFragmentModfStruct   =
18403         {
18404                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18405                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18406                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18407                 "             OpStore %tmp_ptr_s %val_tmp\n"
18408                 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18409                 "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18410                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18411
18412                 "  %fp_${tr} = OpTypePointer Function %${tr}\n"
18413                 "   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18414                 "   %fp_tmp = OpTypePointer Function %st_tmp\n"
18415                 "   %c_frac = OpConstant %i32 0\n"
18416                 "    %c_int = OpConstant %i32 1\n",
18417
18418                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18419                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18420
18421                 "      %tmp = OpVariable %fp_tmp Function\n",
18422         };
18423
18424         const Math16ArgFragments        argFragmentFrexpStructS =
18425         {
18426                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18427                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18428                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18429                 "             OpStore %tmp_ptr_s %val_tmp\n"
18430                 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18431                 "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18432                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18433
18434                 "  %fp_${tr} = OpTypePointer Function %${tr}\n"
18435                 "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18436                 "   %fp_tmp = OpTypePointer Function %st_tmp\n",
18437
18438                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18439                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18440
18441                 "      %tmp = OpVariable %fp_tmp Function\n",
18442         };
18443
18444         const Math16ArgFragments        argFragmentFrexpStructE =
18445         {
18446                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18447                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18448                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18449                 "             OpStore %tmp_ptr_s %val_tmp\n"
18450                 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18451                 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18452                 "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18453                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18454
18455                 "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18456                 "   %fp_tmp = OpTypePointer Function %st_tmp\n",
18457
18458                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18459                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18460
18461                 "      %tmp = OpVariable %fp_tmp Function\n",
18462         };
18463
18464         const Math16ArgFragments        argFragmentFrexpS               =
18465         {
18466                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18467                 "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18468                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18469                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18470
18471                 "",
18472
18473                 "",
18474
18475                 "      %tmp = OpVariable %fp_${dr}i32 Function\n",
18476         };
18477
18478         const Math16ArgFragments        argFragmentFrexpE               =
18479         {
18480                 " %val_src0  = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18481                 "  %out_exp  = OpAccessChain %fp_${dr}i32 %tmp\n"
18482                 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18483                 "%val_dst_i  = OpLoad %${dr}i32 %out_exp\n"
18484                 "  %val_dst  = OpConvertSToF %${tr} %val_dst_i\n"
18485                 "      %dst  = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18486
18487                 "",
18488
18489                 "",
18490
18491                 "      %tmp  = OpVariable %fp_${dr}i32 Function\n",
18492         };
18493
18494         string load_funcs[MATH16_TYPE_LAST];
18495         load_funcs[SCALAR] = loadScalarF16FromUint;
18496         load_funcs[VEC2]   = loadV2F16FromUint;
18497         load_funcs[VEC3]   = loadV3F16FromUints;
18498         load_funcs[VEC4]   = loadV4F16FromUints;
18499         load_funcs[MAT2X2] = loadM2x2F16FromUints;
18500         load_funcs[MAT2X3] = loadM2x3F16FromUints;
18501         load_funcs[MAT2X4] = loadM2x4F16FromUints;
18502         load_funcs[MAT3X2] = loadM3x2F16FromUints;
18503         load_funcs[MAT3X3] = loadM3x3F16FromUints;
18504         load_funcs[MAT3X4] = loadM3x4F16FromUints;
18505         load_funcs[MAT4X2] = loadM4x2F16FromUints;
18506         load_funcs[MAT4X3] = loadM4x3F16FromUints;
18507         load_funcs[MAT4X4] = loadM4x4F16FromUints;
18508
18509         string store_funcs[MATH16_TYPE_LAST];
18510         store_funcs[SCALAR] = storeScalarF16AsUint;
18511         store_funcs[VEC2]   = storeV2F16AsUint;
18512         store_funcs[VEC3]   = storeV3F16AsUints;
18513         store_funcs[VEC4]   = storeV4F16AsUints;
18514         store_funcs[MAT2X2] = storeM2x2F16AsUints;
18515         store_funcs[MAT2X3] = storeM2x3F16AsUints;
18516         store_funcs[MAT2X4] = storeM2x4F16AsUints;
18517         store_funcs[MAT3X2] = storeM3x2F16AsUints;
18518         store_funcs[MAT3X3] = storeM3x3F16AsUints;
18519         store_funcs[MAT3X4] = storeM3x4F16AsUints;
18520         store_funcs[MAT4X2] = storeM4x2F16AsUints;
18521         store_funcs[MAT4X3] = storeM4x3F16AsUints;
18522         store_funcs[MAT4X4] = storeM4x4F16AsUints;
18523
18524         const Math16TestType&           testType                                = testTypes[testTypeIdx];
18525         const string                            funcNameString                  = string(testFunc.funcName) + string(testFunc.funcSuffix);
18526         const string                            testName                                = de::toLower(funcNameString);
18527         const Math16ArgFragments*       argFragments                    = DE_NULL;
18528         const size_t                            typeStructStride                = testType.typeStructStride;
18529         const bool                                      extInst                                 = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18530         const size_t                            numFloatsPerArg0Type    = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18531         const size_t                            iterations                              = numDataPoints / numFloatsPerArg0Type;
18532         const size_t                            numFloatsPerResultType  = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18533         const vector<deFloat16>         float16UnusedOutput             (iterations * numFloatsPerResultType, 0);
18534         VulkanFeatures                          features;
18535         SpecResource                            specResource;
18536         map<string, string>                     specs;
18537         map<string, string>                     fragments;
18538         vector<string>                          extensions;
18539         string                                          funcCall;
18540         string                                          funcVariables;
18541         string                                          variables;
18542         string                                          declarations;
18543         string                                          decorations;
18544         string                                          functions;
18545
18546         switch (testFunc.funcArgsCount)
18547         {
18548                 case 1:
18549                 {
18550                         argFragments = &argFragment1;
18551
18552                         if (funcNameString == "ModfFrac")               argFragments = &argFragmentModfFrac;
18553                         if (funcNameString == "ModfInt")                argFragments = &argFragmentModfInt;
18554                         if (funcNameString == "ModfStructFrac") argFragments = &argFragmentModfStruct;
18555                         if (funcNameString == "ModfStructInt")  argFragments = &argFragmentModfStruct;
18556                         if (funcNameString == "FrexpS")                 argFragments = &argFragmentFrexpS;
18557                         if (funcNameString == "FrexpE")                 argFragments = &argFragmentFrexpE;
18558                         if (funcNameString == "FrexpStructS")   argFragments = &argFragmentFrexpStructS;
18559                         if (funcNameString == "FrexpStructE")   argFragments = &argFragmentFrexpStructE;
18560
18561                         break;
18562                 }
18563                 case 2:
18564                 {
18565                         argFragments = &argFragment2;
18566
18567                         if (funcNameString == "Ldexp")                  argFragments = &argFragmentLdExp;
18568
18569                         break;
18570                 }
18571                 case 3:
18572                 {
18573                         argFragments = &argFragment3;
18574
18575                         break;
18576                 }
18577                 default:
18578                 {
18579                         TCU_THROW(InternalError, "Invalid number of arguments");
18580                 }
18581         }
18582
18583         functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18584         if (testFunc.funcArgsCount == 1)
18585         {
18586                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18587                 variables +=
18588                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18589                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18590
18591                 decorations +=
18592                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18593                         "OpDecorate %ssbo_src0 Binding 0\n"
18594                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
18595                         "OpDecorate %ssbo_dst Binding 1\n";
18596         }
18597         else if (testFunc.funcArgsCount == 2)
18598         {
18599                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18600                 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18601                 variables +=
18602                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18603                         " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18604                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18605
18606                 decorations +=
18607                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18608                         "OpDecorate %ssbo_src0 Binding 0\n"
18609                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18610                         "OpDecorate %ssbo_src1 Binding 1\n"
18611                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
18612                         "OpDecorate %ssbo_dst Binding 2\n";
18613         }
18614         else if (testFunc.funcArgsCount == 3)
18615         {
18616                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18617                 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18618                 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18619                 variables +=
18620                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18621                         " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18622                         " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18623                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18624
18625                 decorations +=
18626                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18627                         "OpDecorate %ssbo_src0 Binding 0\n"
18628                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18629                         "OpDecorate %ssbo_src1 Binding 1\n"
18630                         "OpDecorate %ssbo_src2 DescriptorSet 0\n"
18631                         "OpDecorate %ssbo_src2 Binding 2\n"
18632                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
18633                         "OpDecorate %ssbo_dst Binding 3\n";
18634         }
18635         else
18636         {
18637                 TCU_THROW(InternalError, "Invalid number of function arguments");
18638         }
18639
18640         variables       += argFragments->variables;
18641         decorations     += argFragments->decorations;
18642
18643         specs["dr"]                                     = testTypes[testFunc.typeResult].typePrefix;
18644         specs["d0"]                                     = testTypes[testFunc.typeArg0].typePrefix;
18645         specs["d1"]                                     = testTypes[testFunc.typeArg1].typePrefix;
18646         specs["d2"]                                     = testTypes[testFunc.typeArg2].typePrefix;
18647         specs["tr"]                                     = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18648         specs["t0"]                                     = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18649         specs["t1"]                                     = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18650         specs["t2"]                                     = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18651         specs["store_tr"]                       = string(testTypes[testFunc.typeResult].storage_type);
18652         specs["store_t0"]                       = string(testTypes[testFunc.typeArg0].storage_type);
18653         specs["store_t1"]                       = string(testTypes[testFunc.typeArg1].storage_type);
18654         specs["store_t2"]                       = string(testTypes[testFunc.typeArg2].storage_type);
18655         specs["struct_stride"]          = de::toString(typeStructStride);
18656         specs["op"]                                     = extInst ? "OpExtInst" : testFunc.funcName;
18657         specs["ext_inst"]                       = extInst ? string("%ext_import ") + testFunc.funcName : "";
18658         specs["struct_member"]          = de::toLower(testFunc.funcSuffix);
18659
18660         variables                                       = StringTemplate(variables).specialize(specs);
18661         decorations                                     = StringTemplate(decorations).specialize(specs);
18662         funcVariables                           = StringTemplate(argFragments->funcVariables).specialize(specs);
18663         funcCall                                        = StringTemplate(argFragments->bodies).specialize(specs);
18664
18665         specs["num_data_points"]        = de::toString(iterations);
18666         specs["arg_vars"]                       = variables;
18667         specs["arg_decorations"]        = decorations;
18668         specs["arg_infunc_vars"]        = funcVariables;
18669         specs["arg_func_call"]          = funcCall;
18670
18671         fragments["extension"]          = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18672         fragments["capability"]         = "OpCapability Matrix\nOpCapability Float16\n";
18673         fragments["decoration"]         = decoration.specialize(specs);
18674         fragments["pre_main"]           = preMain.specialize(specs) + functions;
18675         fragments["testfun"]            = testFun.specialize(specs);
18676
18677         for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18678         {
18679                 const size_t                    numFloatsPerItem        = (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18680                                                                                                         : (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18681                                                                                                         : (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18682                                                                                                         : -1;
18683                 const vector<deFloat16> inputData                       = testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18684
18685                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18686         }
18687
18688         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18689         specResource.verifyIO = testFunc.verifyFunc;
18690
18691         extensions.push_back("VK_KHR_shader_float16_int8");
18692
18693         features.extFloat16Int8.shaderFloat16 = true;
18694
18695         finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18696 }
18697
18698 template<size_t C, class SpecResource>
18699 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18700 {
18701         DE_STATIC_ASSERT(C >= 1 && C <= 4);
18702
18703         const std::string                               testGroupName   (string("arithmetic_") + de::toString(C));
18704         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18705         const Math16TestFunc                    testFuncs[]             =
18706         {
18707                 {       "OpFNegate",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16OpFNegate>                                       },
18708                 {       "Round",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Round>                                           },
18709                 {       "RoundEven",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16RoundEven>                                       },
18710                 {       "Trunc",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Trunc>                                           },
18711                 {       "FAbs",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FAbs>                                            },
18712                 {       "FSign",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FSign>                                           },
18713                 {       "Floor",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Floor>                                           },
18714                 {       "Ceil",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Ceil>                                            },
18715                 {       "Fract",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Fract>                                           },
18716                 {       "Radians",                              "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Radians>                                         },
18717                 {       "Degrees",                              "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Degrees>                                         },
18718                 {       "Sin",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sin>                                                     },
18719                 {       "Cos",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cos>                                                     },
18720                 {       "Tan",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tan>                                                     },
18721                 {       "Asin",                                 "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asin>                                            },
18722                 {       "Acos",                                 "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acos>                                            },
18723                 {       "Atan",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atan>                                            },
18724                 {       "Sinh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sinh>                                            },
18725                 {       "Cosh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cosh>                                            },
18726                 {       "Tanh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tanh>                                            },
18727                 {       "Asinh",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asinh>                                           },
18728                 {       "Acosh",                                "",                     1,      C,              C,              0,              0, &getInputDataAC,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acosh>                                           },
18729                 {       "Atanh",                                "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atanh>                                           },
18730                 {       "Exp",                                  "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp>                                                     },
18731                 {       "Log",                                  "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log>                                                     },
18732                 {       "Exp2",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp2>                                            },
18733                 {       "Log2",                                 "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log2>                                            },
18734                 {       "Sqrt",                                 "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sqrt>                                            },
18735                 {       "InverseSqrt",                  "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16InverseSqrt>                                     },
18736                 {       "Modf",                                 "Frac",         1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>                                        },
18737                 {       "Modf",                                 "Int",          1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>                                         },
18738                 {       "ModfStruct",                   "Frac",         1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>                                        },
18739                 {       "ModfStruct",                   "Int",          1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>                                         },
18740                 {       "Frexp",                                "S",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>                                          },
18741                 {       "Frexp",                                "E",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>                                          },
18742                 {       "FrexpStruct",                  "S",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>                                          },
18743                 {       "FrexpStruct",                  "E",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>                                          },
18744                 {       "OpFAdd",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFAdd>                                          },
18745                 {       "OpFSub",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFSub>                                          },
18746                 {       "OpFMul",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFMul>                                          },
18747                 {       "OpFDiv",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFDiv>                                          },
18748                 {       "Atan2",                                "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Atan2>                                           },
18749                 {       "Pow",                                  "",                     2,      C,              C,              C,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Pow>                                                     },
18750                 {       "FMin",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMin>                                            },
18751                 {       "FMax",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMax>                                            },
18752                 {       "Step",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Step>                                            },
18753                 {       "Ldexp",                                "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Ldexp>                                           },
18754                 {       "FClamp",                               "",                     3,      C,              C,              C,              C, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FClamp>                                          },
18755                 {       "FMix",                                 "",                     3,      C,              C,              C,              C, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FMix>                                            },
18756                 {       "SmoothStep",                   "",                     3,      C,              C,              C,              C, &getInputDataSS,     compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16SmoothStep>                                      },
18757                 {       "Fma",                                  "",                     3,      C,              C,              C,              C, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16Fma>                                                     },
18758                 {       "Length",                               "",                     1,      1,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  1,  C,  0,  0, fp16Length>                                          },
18759                 {       "Distance",                             "",                     2,      1,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Distance>                                        },
18760                 {       "Cross",                                "",                     2,      C,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Cross>                                           },
18761                 {       "Normalize",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Normalize>                                       },
18762                 {       "FaceForward",                  "",                     3,      C,              C,              C,              C, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FaceForward>                                     },
18763                 {       "Reflect",                              "",                     2,      C,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Reflect>                                         },
18764                 {       "Refract",                              "",                     3,      C,              C,              C,              1, &getInputDataN,      compareFP16ArithmeticFunc<  C,  C,  C,  1, fp16Refract>                                         },
18765                 {       "OpDot",                                "",                     2,      1,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Dot>                                                     },
18766                 {       "OpVectorTimesScalar",  "",                     2,      C,              C,              1,              0, &getInputDataV,      compareFP16ArithmeticFunc<  C,  C,  1,  0, fp16VectorTimesScalar>                       },
18767         };
18768
18769         for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18770         {
18771                 const Math16TestFunc&   testFunc                = testFuncs[testFuncIdx];
18772                 const string                    funcNameString  = testFunc.funcName;
18773
18774                 if ((C != 3) && funcNameString == "Cross")
18775                         continue;
18776
18777                 if ((C < 2) && funcNameString == "OpDot")
18778                         continue;
18779
18780                 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18781                         continue;
18782
18783                 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18784         }
18785
18786         return testGroup.release();
18787 }
18788
18789 template<class SpecResource>
18790 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18791 {
18792         const std::string                               testGroupName   ("arithmetic");
18793         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18794         const Math16TestFunc                    testFuncs[]             =
18795         {
18796                 {       "OpTranspose",                  "2x2",          1,      MAT2X2, MAT2X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Transpose<2,2> >                         },
18797                 {       "OpTranspose",                  "3x2",          1,      MAT2X3, MAT3X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<3,2> >                         },
18798                 {       "OpTranspose",                  "4x2",          1,      MAT2X4, MAT4X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<4,2> >                         },
18799                 {       "OpTranspose",                  "2x3",          1,      MAT3X2, MAT2X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,3> >                         },
18800                 {       "OpTranspose",                  "3x3",          1,      MAT3X3, MAT3X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,3> >                         },
18801                 {       "OpTranspose",                  "4x3",          1,      MAT3X4, MAT4X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,3> >                         },
18802                 {       "OpTranspose",                  "2x4",          1,      MAT4X2, MAT2X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,4> >                         },
18803                 {       "OpTranspose",                  "3x4",          1,      MAT4X3, MAT3X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,4> >                         },
18804                 {       "OpTranspose",                  "4x4",          1,      MAT4X4, MAT4X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,4> >                         },
18805                 {       "OpMatrixTimesScalar",  "2x2",          2,      MAT2X2, MAT2X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4,  1,  0, fp16MatrixTimesScalar<2,2> >         },
18806                 {       "OpMatrixTimesScalar",  "2x3",          2,      MAT2X3, MAT2X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,3> >         },
18807                 {       "OpMatrixTimesScalar",  "2x4",          2,      MAT2X4, MAT2X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,4> >         },
18808                 {       "OpMatrixTimesScalar",  "3x2",          2,      MAT3X2, MAT3X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<3,2> >         },
18809                 {       "OpMatrixTimesScalar",  "3x3",          2,      MAT3X3, MAT3X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,3> >         },
18810                 {       "OpMatrixTimesScalar",  "3x4",          2,      MAT3X4, MAT3X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,4> >         },
18811                 {       "OpMatrixTimesScalar",  "4x2",          2,      MAT4X2, MAT4X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<4,2> >         },
18812                 {       "OpMatrixTimesScalar",  "4x3",          2,      MAT4X3, MAT4X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,3> >         },
18813                 {       "OpMatrixTimesScalar",  "4x4",          2,      MAT4X4, MAT4X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,4> >         },
18814                 {       "OpVectorTimesMatrix",  "2x2",          2,      VEC2,   VEC2,   MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  2,  4,  0, fp16VectorTimesMatrix<2,2> >         },
18815                 {       "OpVectorTimesMatrix",  "2x3",          2,      VEC2,   VEC3,   MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  3,  8,  0, fp16VectorTimesMatrix<2,3> >         },
18816                 {       "OpVectorTimesMatrix",  "2x4",          2,      VEC2,   VEC4,   MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  4,  8,  0, fp16VectorTimesMatrix<2,4> >         },
18817                 {       "OpVectorTimesMatrix",  "3x2",          2,      VEC3,   VEC2,   MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  2,  8,  0, fp16VectorTimesMatrix<3,2> >         },
18818                 {       "OpVectorTimesMatrix",  "3x3",          2,      VEC3,   VEC3,   MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  3, 16,  0, fp16VectorTimesMatrix<3,3> >         },
18819                 {       "OpVectorTimesMatrix",  "3x4",          2,      VEC3,   VEC4,   MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  4, 16,  0, fp16VectorTimesMatrix<3,4> >         },
18820                 {       "OpVectorTimesMatrix",  "4x2",          2,      VEC4,   VEC2,   MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  2,  8,  0, fp16VectorTimesMatrix<4,2> >         },
18821                 {       "OpVectorTimesMatrix",  "4x3",          2,      VEC4,   VEC3,   MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  3, 16,  0, fp16VectorTimesMatrix<4,3> >         },
18822                 {       "OpVectorTimesMatrix",  "4x4",          2,      VEC4,   VEC4,   MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4, 16,  0, fp16VectorTimesMatrix<4,4> >         },
18823                 {       "OpMatrixTimesVector",  "2x2",          2,      VEC2,   MAT2X2, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  4,  2,  0, fp16MatrixTimesVector<2,2> >         },
18824                 {       "OpMatrixTimesVector",  "2x3",          2,      VEC3,   MAT2X3, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  8,  2,  0, fp16MatrixTimesVector<2,3> >         },
18825                 {       "OpMatrixTimesVector",  "2x4",          2,      VEC4,   MAT2X4, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  2,  0, fp16MatrixTimesVector<2,4> >         },
18826                 {       "OpMatrixTimesVector",  "3x2",          2,      VEC2,   MAT3X2, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  8,  3,  0, fp16MatrixTimesVector<3,2> >         },
18827                 {       "OpMatrixTimesVector",  "3x3",          2,      VEC3,   MAT3X3, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3, 16,  3,  0, fp16MatrixTimesVector<3,3> >         },
18828                 {       "OpMatrixTimesVector",  "3x4",          2,      VEC4,   MAT3X4, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4, 16,  3,  0, fp16MatrixTimesVector<3,4> >         },
18829                 {       "OpMatrixTimesVector",  "4x2",          2,      VEC2,   MAT4X2, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  8,  4,  0, fp16MatrixTimesVector<4,2> >         },
18830                 {       "OpMatrixTimesVector",  "4x3",          2,      VEC3,   MAT4X3, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3, 16,  4,  0, fp16MatrixTimesVector<4,3> >         },
18831                 {       "OpMatrixTimesVector",  "4x4",          2,      VEC4,   MAT4X4, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4, 16,  4,  0, fp16MatrixTimesVector<4,4> >         },
18832                 {       "OpMatrixTimesMatrix",  "2x2_2x2",      2,      MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4,  4,  0, fp16MatrixTimesMatrix<2,2,2,2> >     },
18833                 {       "OpMatrixTimesMatrix",  "2x2_3x2",      2,      MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,3,2> >     },
18834                 {       "OpMatrixTimesMatrix",  "2x2_4x2",      2,      MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,4,2> >     },
18835                 {       "OpMatrixTimesMatrix",  "2x3_2x2",      2,      MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,3,2,2> >     },
18836                 {       "OpMatrixTimesMatrix",  "2x3_3x2",      2,      MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,3,2> >     },
18837                 {       "OpMatrixTimesMatrix",  "2x3_4x2",      2,      MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,4,2> >     },
18838                 {       "OpMatrixTimesMatrix",  "2x4_2x2",      2,      MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,4,2,2> >     },
18839                 {       "OpMatrixTimesMatrix",  "2x4_3x2",      2,      MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,3,2> >     },
18840                 {       "OpMatrixTimesMatrix",  "2x4_4x2",      2,      MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,4,2> >     },
18841                 {       "OpMatrixTimesMatrix",  "3x2_2x3",      2,      MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<3,2,2,3> >     },
18842                 {       "OpMatrixTimesMatrix",  "3x2_3x3",      2,      MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,3,3> >     },
18843                 {       "OpMatrixTimesMatrix",  "3x2_4x3",      2,      MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,4,3> >     },
18844                 {       "OpMatrixTimesMatrix",  "3x3_2x3",      2,      MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,3,2,3> >     },
18845                 {       "OpMatrixTimesMatrix",  "3x3_3x3",      2,      MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,3,3> >     },
18846                 {       "OpMatrixTimesMatrix",  "3x3_4x3",      2,      MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,4,3> >     },
18847                 {       "OpMatrixTimesMatrix",  "3x4_2x3",      2,      MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,4,2,3> >     },
18848                 {       "OpMatrixTimesMatrix",  "3x4_3x3",      2,      MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,3,3> >     },
18849                 {       "OpMatrixTimesMatrix",  "3x4_4x3",      2,      MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,4,3> >     },
18850                 {       "OpMatrixTimesMatrix",  "4x2_2x4",      2,      MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<4,2,2,4> >     },
18851                 {       "OpMatrixTimesMatrix",  "4x2_3x4",      2,      MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,3,4> >     },
18852                 {       "OpMatrixTimesMatrix",  "4x2_4x4",      2,      MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,4,4> >     },
18853                 {       "OpMatrixTimesMatrix",  "4x3_2x4",      2,      MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,3,2,4> >     },
18854                 {       "OpMatrixTimesMatrix",  "4x3_3x4",      2,      MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,3,4> >     },
18855                 {       "OpMatrixTimesMatrix",  "4x3_4x4",      2,      MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,4,4> >     },
18856                 {       "OpMatrixTimesMatrix",  "4x4_2x4",      2,      MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,4,2,4> >     },
18857                 {       "OpMatrixTimesMatrix",  "4x4_3x4",      2,      MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,3,4> >     },
18858                 {       "OpMatrixTimesMatrix",  "4x4_4x4",      2,      MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,4,4> >     },
18859                 {       "OpOuterProduct",               "2x2",          2,      MAT2X2, VEC2,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  2,  2,  0, fp16OuterProduct<2,2> >                      },
18860                 {       "OpOuterProduct",               "2x3",          2,      MAT2X3, VEC3,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  3,  2,  0, fp16OuterProduct<2,3> >                      },
18861                 {       "OpOuterProduct",               "2x4",          2,      MAT2X4, VEC4,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  2,  0, fp16OuterProduct<2,4> >                      },
18862                 {       "OpOuterProduct",               "3x2",          2,      MAT3X2, VEC2,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  2,  3,  0, fp16OuterProduct<3,2> >                      },
18863                 {       "OpOuterProduct",               "3x3",          2,      MAT3X3, VEC3,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  3,  3,  0, fp16OuterProduct<3,3> >                      },
18864                 {       "OpOuterProduct",               "3x4",          2,      MAT3X4, VEC4,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  4,  3,  0, fp16OuterProduct<3,4> >                      },
18865                 {       "OpOuterProduct",               "4x2",          2,      MAT4X2, VEC2,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  2,  4,  0, fp16OuterProduct<4,2> >                      },
18866                 {       "OpOuterProduct",               "4x3",          2,      MAT4X3, VEC3,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  3,  4,  0, fp16OuterProduct<4,3> >                      },
18867                 {       "OpOuterProduct",               "4x4",          2,      MAT4X4, VEC4,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  4,  4,  0, fp16OuterProduct<4,4> >                      },
18868                 {       "Determinant",                  "2x2",          1,      SCALAR, MAT2X2, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1,  4,  0,  0, fp16Determinant<2> >                         },
18869                 {       "Determinant",                  "3x3",          1,      SCALAR, MAT3X3, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<3> >                         },
18870                 {       "Determinant",                  "4x4",          1,      SCALAR, MAT4X4, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<4> >                         },
18871                 {       "MatrixInverse",                "2x2",          1,      MAT2X2, MAT2X2, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Inverse<2> >                                     },
18872         };
18873
18874         for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18875         {
18876                 const Math16TestFunc&   testFunc        = testFuncs[testFuncIdx];
18877
18878                 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18879         }
18880
18881         return testGroup.release();
18882 }
18883
18884 struct ComparisonCase
18885 {
18886         string name;
18887         string desc;
18888 };
18889
18890 template<size_t C>
18891 tcu::TestCaseGroup* createFloat32ComparisonComputeSet (tcu::TestContext& testCtx)
18892 {
18893         const string                                    testGroupName   ("comparison_" + de::toString(C));
18894         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18895 #ifndef CTS_USES_VULKANSC
18896         const char*                                             dataDir                 = "spirv_assembly/instruction/float32/comparison";
18897
18898         const ComparisonCase                    amberTests[]    =
18899         {
18900                 { "modfstruct",         "modf and modfStruct"   },
18901                 { "frexpstruct",        "frexp and frexpStruct" }
18902         };
18903
18904         for (ComparisonCase test : amberTests)
18905         {
18906                 const string caseDesc ("Compare output of " + test.desc);
18907                 const string fileName (test.name + "_" + de::toString(C) + "_comp.amber");
18908
18909                 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18910                                                                                                                    test.name.c_str(),
18911                                                                                                                    caseDesc.c_str(),
18912                                                                                                                    dataDir,
18913                                                                                                                    fileName));
18914         }
18915 #endif
18916         return testGroup.release();
18917 }
18918
18919 struct ShaderStage
18920 {
18921         string                  name;
18922         vector<string>  requirement;
18923 };
18924
18925 template<size_t C>
18926 tcu::TestCaseGroup* createFloat32ComparisonGraphicsSet (tcu::TestContext& testCtx)
18927 {
18928         const string                                    testGroupName   ("comparison_" + de::toString(C));
18929         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18930 #ifndef CTS_USES_VULKANSC
18931         const char*                                             dataDir                 = "spirv_assembly/instruction/float32/comparison";
18932
18933         const ShaderStage                               stages[]                =
18934         {
18935                 { "vert", vector<string>(0) },
18936                 { "tesc", vector<string>(1, "Features.tessellationShader") },
18937                 { "tese", vector<string>(1, "Features.tessellationShader") },
18938                 { "geom", vector<string>(1, "Features.geometryShader") },
18939                 { "frag", vector<string>(0) }
18940         };
18941
18942         const ComparisonCase                    amberTests[]    =
18943         {
18944                 { "modfstruct",         "modf and modfStruct"   },
18945                 { "frexpstruct",        "frexp and frexpStruct" }
18946         };
18947
18948         for (ComparisonCase test : amberTests)
18949         for (ShaderStage stage : stages)
18950         {
18951                 const string caseName (test.name + "_" + stage.name);
18952                 const string caseDesc ("Compare output of " + test.desc);
18953                 const string fileName (test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
18954
18955                 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18956                                                                                                                    caseName.c_str(),
18957                                                                                                                    caseDesc.c_str(),
18958                                                                                                                    dataDir,
18959                                                                                                                    fileName,
18960                                                                                                                    stage.requirement));
18961         }
18962 #endif
18963
18964         return testGroup.release();
18965 }
18966
18967 const string getNumberTypeName (const NumberType type)
18968 {
18969         if (type == NUMBERTYPE_INT32)
18970         {
18971                 return "int";
18972         }
18973         else if (type == NUMBERTYPE_UINT32)
18974         {
18975                 return "uint";
18976         }
18977         else if (type == NUMBERTYPE_FLOAT32)
18978         {
18979                 return "float";
18980         }
18981         else
18982         {
18983                 DE_ASSERT(false);
18984                 return "";
18985         }
18986 }
18987
18988 deInt32 getInt(de::Random& rnd)
18989 {
18990         return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
18991 }
18992
18993 const string repeatString (const string& str, int times)
18994 {
18995         string filler;
18996         for (int i = 0; i < times; ++i)
18997         {
18998                 filler += str;
18999         }
19000         return filler;
19001 }
19002
19003 const string getRandomConstantString (const NumberType type, de::Random& rnd)
19004 {
19005         if (type == NUMBERTYPE_INT32)
19006         {
19007                 return numberToString<deInt32>(getInt(rnd));
19008         }
19009         else if (type == NUMBERTYPE_UINT32)
19010         {
19011                 return numberToString<deUint32>(rnd.getUint32());
19012         }
19013         else if (type == NUMBERTYPE_FLOAT32)
19014         {
19015                 return numberToString<float>(rnd.getFloat());
19016         }
19017         else
19018         {
19019                 DE_ASSERT(false);
19020                 return "";
19021         }
19022 }
19023
19024 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19025 {
19026         map<string, string> params;
19027
19028         // Vec2 to Vec4
19029         for (int width = 2; width <= 4; ++width)
19030         {
19031                 const string randomConst = numberToString(getInt(rnd));
19032                 const string widthStr = numberToString(width);
19033                 const string composite_type = "${customType}vec" + widthStr;
19034                 const int index = rnd.getInt(0, width-1);
19035
19036                 params["type"]                  = "vec";
19037                 params["name"]                  = params["type"] + "_" + widthStr;
19038                 params["compositeDecl"]         = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
19039                 params["compositeType"]         = composite_type;
19040                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19041                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19042                 params["indexes"]               = numberToString(index);
19043                 testCases.push_back(params);
19044         }
19045 }
19046
19047 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19048 {
19049         const int limit = 10;
19050         map<string, string> params;
19051
19052         for (int width = 2; width <= limit; ++width)
19053         {
19054                 string randomConst = numberToString(getInt(rnd));
19055                 string widthStr = numberToString(width);
19056                 int index = rnd.getInt(0, width-1);
19057
19058                 params["type"]                  = "array";
19059                 params["name"]                  = params["type"] + "_" + widthStr;
19060                 params["compositeDecl"]         = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
19061                                                                                         +        "%composite = OpTypeArray ${customType} %arraywidth\n";
19062                 params["compositeType"]         = "%composite";
19063                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19064                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19065                 params["indexes"]               = numberToString(index);
19066                 testCases.push_back(params);
19067         }
19068 }
19069
19070 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19071 {
19072         const int limit = 10;
19073         map<string, string> params;
19074
19075         for (int width = 2; width <= limit; ++width)
19076         {
19077                 string randomConst = numberToString(getInt(rnd));
19078                 int index = rnd.getInt(0, width-1);
19079
19080                 params["type"]                  = "struct";
19081                 params["name"]                  = params["type"] + "_" + numberToString(width);
19082                 params["compositeDecl"]         = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19083                 params["compositeType"]         = "%composite";
19084                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19085                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19086                 params["indexes"]               = numberToString(index);
19087                 testCases.push_back(params);
19088         }
19089 }
19090
19091 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19092 {
19093         map<string, string> params;
19094
19095         // Vec2 to Vec4
19096         for (int width = 2; width <= 4; ++width)
19097         {
19098                 string widthStr = numberToString(width);
19099
19100                 for (int column = 2 ; column <= 4; ++column)
19101                 {
19102                         int index_0 = rnd.getInt(0, column-1);
19103                         int index_1 = rnd.getInt(0, width-1);
19104                         string columnStr = numberToString(column);
19105
19106                         params["type"]          = "matrix";
19107                         params["name"]          = params["type"] + "_" + widthStr + "x" + columnStr;
19108                         params["compositeDecl"] = string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
19109                                                                                                 +        "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19110                         params["compositeType"] = "%composite";
19111
19112                         params["filler"]        = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
19113                                                                                                 +        "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
19114
19115                         params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19116                         params["indexes"]       = numberToString(index_0) + " " + numberToString(index_1);
19117                         testCases.push_back(params);
19118                 }
19119         }
19120 }
19121
19122 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19123 {
19124         createVectorCompositeCases(testCases, rnd, type);
19125         createArrayCompositeCases(testCases, rnd, type);
19126         createStructCompositeCases(testCases, rnd, type);
19127         // Matrix only supports float types
19128         if (type == NUMBERTYPE_FLOAT32)
19129         {
19130                 createMatrixCompositeCases(testCases, rnd, type);
19131         }
19132 }
19133
19134 const string getAssemblyTypeDeclaration (const NumberType type)
19135 {
19136         switch (type)
19137         {
19138                 case NUMBERTYPE_INT32:          return "OpTypeInt 32 1";
19139                 case NUMBERTYPE_UINT32:         return "OpTypeInt 32 0";
19140                 case NUMBERTYPE_FLOAT32:        return "OpTypeFloat 32";
19141                 default:                        DE_ASSERT(false); return "";
19142         }
19143 }
19144
19145 const string getAssemblyTypeName (const NumberType type)
19146 {
19147         switch (type)
19148         {
19149                 case NUMBERTYPE_INT32:          return "%i32";
19150                 case NUMBERTYPE_UINT32:         return "%u32";
19151                 case NUMBERTYPE_FLOAT32:        return "%f32";
19152                 default:                        DE_ASSERT(false); return "";
19153         }
19154 }
19155
19156 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
19157 {
19158         map<string, string>     parameters(params);
19159
19160         const string customType = getAssemblyTypeName(type);
19161         map<string, string> substCustomType;
19162         substCustomType["customType"] = customType;
19163         parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19164         parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19165         parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19166         parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19167         parameters["customType"] = customType;
19168         parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19169
19170         if (parameters.at("compositeType") != "%u32vec3")
19171         {
19172                 parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19173         }
19174
19175         return StringTemplate(
19176                 "OpCapability Shader\n"
19177                 "OpCapability Matrix\n"
19178                 "OpMemoryModel Logical GLSL450\n"
19179                 "OpEntryPoint GLCompute %main \"main\" %id\n"
19180                 "OpExecutionMode %main LocalSize 1 1 1\n"
19181
19182                 "OpSource GLSL 430\n"
19183                 "OpName %main           \"main\"\n"
19184                 "OpName %id             \"gl_GlobalInvocationID\"\n"
19185
19186                 // Decorators
19187                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19188                 "OpDecorate %buf BufferBlock\n"
19189                 "OpDecorate %indata DescriptorSet 0\n"
19190                 "OpDecorate %indata Binding 0\n"
19191                 "OpDecorate %outdata DescriptorSet 0\n"
19192                 "OpDecorate %outdata Binding 1\n"
19193                 "OpDecorate %customarr ArrayStride 4\n"
19194                 "${compositeDecorator}"
19195                 "OpMemberDecorate %buf 0 Offset 0\n"
19196
19197                 // General types
19198                 "%void      = OpTypeVoid\n"
19199                 "%voidf     = OpTypeFunction %void\n"
19200                 "%u32       = OpTypeInt 32 0\n"
19201                 "%i32       = OpTypeInt 32 1\n"
19202                 "%f32       = OpTypeFloat 32\n"
19203
19204                 // Composite declaration
19205                 "${compositeDecl}"
19206
19207                 // Constants
19208                 "${filler}"
19209
19210                 "${u32vec3Decl:opt}"
19211                 "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19212
19213                 // Inherited from custom
19214                 "%customptr = OpTypePointer Uniform ${customType}\n"
19215                 "%customarr = OpTypeRuntimeArray ${customType}\n"
19216                 "%buf       = OpTypeStruct %customarr\n"
19217                 "%bufptr    = OpTypePointer Uniform %buf\n"
19218
19219                 "%indata    = OpVariable %bufptr Uniform\n"
19220                 "%outdata   = OpVariable %bufptr Uniform\n"
19221
19222                 "%id        = OpVariable %uvec3ptr Input\n"
19223                 "%zero      = OpConstant %i32 0\n"
19224
19225                 "%main      = OpFunction %void None %voidf\n"
19226                 "%label     = OpLabel\n"
19227                 "%idval     = OpLoad %u32vec3 %id\n"
19228                 "%x         = OpCompositeExtract %u32 %idval 0\n"
19229
19230                 "%inloc     = OpAccessChain %customptr %indata %zero %x\n"
19231                 "%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
19232                 // Read the input value
19233                 "%inval     = OpLoad ${customType} %inloc\n"
19234                 // Create the composite and fill it
19235                 "${compositeConstruct}"
19236                 // Insert the input value to a place
19237                 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19238                 // Read back the value from the position
19239                 "%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19240                 // Store it in the output position
19241                 "             OpStore %outloc %out_val\n"
19242                 "             OpReturn\n"
19243                 "             OpFunctionEnd\n"
19244         ).specialize(parameters);
19245 }
19246
19247 template<typename T>
19248 BufferSp createCompositeBuffer(T number)
19249 {
19250         return BufferSp(new Buffer<T>(vector<T>(1, number)));
19251 }
19252
19253 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
19254 {
19255         de::MovePtr<tcu::TestCaseGroup> group   (new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
19256         de::Random                                              rnd             (deStringHash(group->getName()));
19257
19258         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19259         {
19260                 NumberType                                              numberType              = NumberType(type);
19261                 const string                                    typeName                = getNumberTypeName(numberType);
19262                 const string                                    description             = "Test the OpCompositeInsert instruction with " + typeName + "s";
19263                 de::MovePtr<tcu::TestCaseGroup> subGroup                (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19264                 vector<map<string, string> >    testCases;
19265
19266                 createCompositeCases(testCases, rnd, numberType);
19267
19268                 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19269                 {
19270                         ComputeShaderSpec       spec;
19271
19272                         spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19273
19274                         switch (numberType)
19275                         {
19276                                 case NUMBERTYPE_INT32:
19277                                 {
19278                                         deInt32 number = getInt(rnd);
19279                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19280                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19281                                         break;
19282                                 }
19283                                 case NUMBERTYPE_UINT32:
19284                                 {
19285                                         deUint32 number = rnd.getUint32();
19286                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19287                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19288                                         break;
19289                                 }
19290                                 case NUMBERTYPE_FLOAT32:
19291                                 {
19292                                         float number = rnd.getFloat();
19293                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
19294                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
19295                                         break;
19296                                 }
19297                                 default:
19298                                         DE_ASSERT(false);
19299                         }
19300
19301                         spec.numWorkGroups = IVec3(1, 1, 1);
19302                         subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
19303                 }
19304                 group->addChild(subGroup.release());
19305         }
19306         return group.release();
19307 }
19308
19309 struct AssemblyStructInfo
19310 {
19311         AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
19312         : components    (comp)
19313         , index                 (idx)
19314         {}
19315
19316         deUint32 components;
19317         deUint32 index;
19318 };
19319
19320 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
19321 {
19322         // Create the full index string
19323         string                          fullIndex       = numberToString(structInfo.index) + " " + params.at("indexes");
19324         // Convert it to list of indexes
19325         vector<string>          indexes         = de::splitString(fullIndex, ' ');
19326
19327         map<string, string>     parameters      (params);
19328         parameters["structType"]        = repeatString(" ${compositeType}", structInfo.components);
19329         parameters["structConstruct"]   = repeatString(" %instance", structInfo.components);
19330         parameters["insertIndexes"]     = fullIndex;
19331
19332         // In matrix cases the last two index is the CompositeExtract indexes
19333         const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19334
19335         // Construct the extractIndex
19336         for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19337         {
19338                 parameters["extractIndexes"] += " " + *index;
19339         }
19340
19341         // Remove the last 1 or 2 element depends on matrix case or not
19342         indexes.erase(indexes.end() - extractIndexes, indexes.end());
19343
19344         deUint32 id = 0;
19345         // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19346         for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19347         {
19348                 string indexId = "%index_" + numberToString(id++);
19349                 parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
19350                 parameters["accessChainIndexes"] += " " + indexId;
19351         }
19352
19353         parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19354
19355         const string customType = getAssemblyTypeName(type);
19356         map<string, string> substCustomType;
19357         substCustomType["customType"] = customType;
19358         parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19359         parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19360         parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19361         parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19362         parameters["customType"] = customType;
19363
19364         const string compositeType = parameters.at("compositeType");
19365         map<string, string> substCompositeType;
19366         substCompositeType["compositeType"] = compositeType;
19367         parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19368         if (compositeType != "%u32vec3")
19369         {
19370                 parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19371         }
19372
19373         return StringTemplate(
19374                 "OpCapability Shader\n"
19375                 "OpCapability Matrix\n"
19376                 "OpMemoryModel Logical GLSL450\n"
19377                 "OpEntryPoint GLCompute %main \"main\" %id\n"
19378                 "OpExecutionMode %main LocalSize 1 1 1\n"
19379
19380                 "OpSource GLSL 430\n"
19381                 "OpName %main           \"main\"\n"
19382                 "OpName %id             \"gl_GlobalInvocationID\"\n"
19383                 // Decorators
19384                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19385                 "OpDecorate %buf BufferBlock\n"
19386                 "OpDecorate %indata DescriptorSet 0\n"
19387                 "OpDecorate %indata Binding 0\n"
19388                 "OpDecorate %outdata DescriptorSet 0\n"
19389                 "OpDecorate %outdata Binding 1\n"
19390                 "OpDecorate %customarr ArrayStride 4\n"
19391                 "${compositeDecorator}"
19392                 "OpMemberDecorate %buf 0 Offset 0\n"
19393                 // General types
19394                 "%void      = OpTypeVoid\n"
19395                 "%voidf     = OpTypeFunction %void\n"
19396                 "%i32       = OpTypeInt 32 1\n"
19397                 "%u32       = OpTypeInt 32 0\n"
19398                 "%f32       = OpTypeFloat 32\n"
19399                 // Custom types
19400                 "${compositeDecl}"
19401                 // %u32vec3 if not already declared in ${compositeDecl}
19402                 "${u32vec3Decl:opt}"
19403                 "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19404                 // Inherited from composite
19405                 "%composite_p = OpTypePointer Function ${compositeType}\n"
19406                 "%struct_t  = OpTypeStruct${structType}\n"
19407                 "%struct_p  = OpTypePointer Function %struct_t\n"
19408                 // Constants
19409                 "${filler}"
19410                 "${accessChainConstDeclaration}"
19411                 // Inherited from custom
19412                 "%customptr = OpTypePointer Uniform ${customType}\n"
19413                 "%customarr = OpTypeRuntimeArray ${customType}\n"
19414                 "%buf       = OpTypeStruct %customarr\n"
19415                 "%bufptr    = OpTypePointer Uniform %buf\n"
19416                 "%indata    = OpVariable %bufptr Uniform\n"
19417                 "%outdata   = OpVariable %bufptr Uniform\n"
19418
19419                 "%id        = OpVariable %uvec3ptr Input\n"
19420                 "%zero      = OpConstant %u32 0\n"
19421                 "%main      = OpFunction %void None %voidf\n"
19422                 "%label     = OpLabel\n"
19423                 "%struct_v  = OpVariable %struct_p Function\n"
19424                 "%idval     = OpLoad %u32vec3 %id\n"
19425                 "%x         = OpCompositeExtract %u32 %idval 0\n"
19426                 // Create the input/output type
19427                 "%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19428                 "%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19429                 // Read the input value
19430                 "%inval     = OpLoad ${customType} %inloc\n"
19431                 // Create the composite and fill it
19432                 "${compositeConstruct}"
19433                 // Create the struct and fill it with the composite
19434                 "%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
19435                 // Insert the value
19436                 "%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19437                 // Store the object
19438                 "             OpStore %struct_v %comp_obj\n"
19439                 // Get deepest possible composite pointer
19440                 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19441                 "%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
19442                 // Read back the stored value
19443                 "%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19444                 "             OpStore %outloc %read_val\n"
19445                 "             OpReturn\n"
19446                 "             OpFunctionEnd\n"
19447         ).specialize(parameters);
19448 }
19449
19450 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
19451 {
19452         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
19453         de::Random                                              rnd                             (deStringHash(group->getName()));
19454
19455         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19456         {
19457                 NumberType                                              numberType      = NumberType(type);
19458                 const string                                    typeName        = getNumberTypeName(numberType);
19459                 const string                                    description     = "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
19460                 de::MovePtr<tcu::TestCaseGroup> subGroup        (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19461
19462                 vector<map<string, string> >    testCases;
19463                 createCompositeCases(testCases, rnd, numberType);
19464
19465                 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19466                 {
19467                         ComputeShaderSpec       spec;
19468
19469                         // Number of components inside of a struct
19470                         deUint32 structComponents = rnd.getInt(2, 8);
19471                         // Component index value
19472                         deUint32 structIndex = rnd.getInt(0, structComponents - 1);
19473                         AssemblyStructInfo structInfo(structComponents, structIndex);
19474
19475                         spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19476
19477                         switch (numberType)
19478                         {
19479                                 case NUMBERTYPE_INT32:
19480                                 {
19481                                         deInt32 number = getInt(rnd);
19482                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19483                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19484                                         break;
19485                                 }
19486                                 case NUMBERTYPE_UINT32:
19487                                 {
19488                                         deUint32 number = rnd.getUint32();
19489                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19490                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19491                                         break;
19492                                 }
19493                                 case NUMBERTYPE_FLOAT32:
19494                                 {
19495                                         float number = rnd.getFloat();
19496                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
19497                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
19498                                         break;
19499                                 }
19500                                 default:
19501                                         DE_ASSERT(false);
19502                         }
19503                         spec.numWorkGroups = IVec3(1, 1, 1);
19504                         subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
19505                 }
19506                 group->addChild(subGroup.release());
19507         }
19508         return group.release();
19509 }
19510
19511 // If the params missing, uninitialized case
19512 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
19513 {
19514         map<string, string> parameters(params);
19515
19516         parameters["customType"]        = getAssemblyTypeName(type);
19517
19518         // Declare the const value, and use it in the initializer
19519         if (params.find("constValue") != params.end())
19520         {
19521                 parameters["variableInitializer"]       = " %const";
19522         }
19523         // Uninitialized case
19524         else
19525         {
19526                 parameters["commentDecl"]       = ";";
19527         }
19528
19529         return StringTemplate(
19530                 "OpCapability Shader\n"
19531                 "OpMemoryModel Logical GLSL450\n"
19532                 "OpEntryPoint GLCompute %main \"main\" %id\n"
19533                 "OpExecutionMode %main LocalSize 1 1 1\n"
19534                 "OpSource GLSL 430\n"
19535                 "OpName %main           \"main\"\n"
19536                 "OpName %id             \"gl_GlobalInvocationID\"\n"
19537                 // Decorators
19538                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19539                 "OpDecorate %indata DescriptorSet 0\n"
19540                 "OpDecorate %indata Binding 0\n"
19541                 "OpDecorate %outdata DescriptorSet 0\n"
19542                 "OpDecorate %outdata Binding 1\n"
19543                 "OpDecorate %in_arr ArrayStride 4\n"
19544                 "OpDecorate %in_buf BufferBlock\n"
19545                 "OpMemberDecorate %in_buf 0 Offset 0\n"
19546                 // Base types
19547                 "%void       = OpTypeVoid\n"
19548                 "%voidf      = OpTypeFunction %void\n"
19549                 "%u32        = OpTypeInt 32 0\n"
19550                 "%i32        = OpTypeInt 32 1\n"
19551                 "%f32        = OpTypeFloat 32\n"
19552                 "%uvec3      = OpTypeVector %u32 3\n"
19553                 "%uvec3ptr   = OpTypePointer Input %uvec3\n"
19554                 "${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
19555                 // Derived types
19556                 "%in_ptr     = OpTypePointer Uniform ${customType}\n"
19557                 "%in_arr     = OpTypeRuntimeArray ${customType}\n"
19558                 "%in_buf     = OpTypeStruct %in_arr\n"
19559                 "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
19560                 "%indata     = OpVariable %in_bufptr Uniform\n"
19561                 "%outdata    = OpVariable %in_bufptr Uniform\n"
19562                 "%id         = OpVariable %uvec3ptr Input\n"
19563                 "%var_ptr    = OpTypePointer Function ${customType}\n"
19564                 // Constants
19565                 "%zero       = OpConstant %i32 0\n"
19566                 // Main function
19567                 "%main       = OpFunction %void None %voidf\n"
19568                 "%label      = OpLabel\n"
19569                 "%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
19570                 "%idval      = OpLoad %uvec3 %id\n"
19571                 "%x          = OpCompositeExtract %u32 %idval 0\n"
19572                 "%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
19573                 "%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
19574
19575                 "%outval     = OpLoad ${customType} %out_var\n"
19576                 "              OpStore %outloc %outval\n"
19577                 "              OpReturn\n"
19578                 "              OpFunctionEnd\n"
19579         ).specialize(parameters);
19580 }
19581
19582 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
19583 {
19584         DE_ASSERT(outputAllocs.size() != 0);
19585         DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19586
19587         // Use custom epsilon because of the float->string conversion
19588         const float     epsilon = 0.00001f;
19589
19590         for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19591         {
19592                 vector<deUint8> expectedBytes;
19593                 float                   expected;
19594                 float                   actual;
19595
19596                 expectedOutputs[outputNdx].getBytes(expectedBytes);
19597                 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
19598                 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
19599
19600                 // Test with epsilon
19601                 if (fabs(expected - actual) > epsilon)
19602                 {
19603                         log << TestLog::Message << "Error: The actual and expected values not matching."
19604                                 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
19605                         return false;
19606                 }
19607         }
19608         return true;
19609 }
19610
19611 // Checks if the driver crash with uninitialized cases
19612 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
19613 {
19614         DE_ASSERT(outputAllocs.size() != 0);
19615         DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19616
19617         // Copy and discard the result.
19618         for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19619         {
19620                 vector<deUint8> expectedBytes;
19621                 expectedOutputs[outputNdx].getBytes(expectedBytes);
19622
19623                 const size_t    width                   = expectedBytes.size();
19624                 vector<char>    data                    (width);
19625
19626                 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
19627         }
19628         return true;
19629 }
19630
19631 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19632 {
19633         de::MovePtr<tcu::TestCaseGroup> group   (new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19634         de::Random                                              rnd             (deStringHash(group->getName()));
19635
19636         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19637         {
19638                 NumberType                                              numberType      = NumberType(type);
19639                 const string                                    typeName        = getNumberTypeName(numberType);
19640                 const string                                    description     = "Test the OpVariable initializer with " + typeName + ".";
19641                 de::MovePtr<tcu::TestCaseGroup> subGroup        (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19642
19643                 // 2 similar subcases (initialized and uninitialized)
19644                 for (int subCase = 0; subCase < 2; ++subCase)
19645                 {
19646                         ComputeShaderSpec spec;
19647                         spec.numWorkGroups = IVec3(1, 1, 1);
19648
19649                         map<string, string>                             params;
19650
19651                         switch (numberType)
19652                         {
19653                                 case NUMBERTYPE_INT32:
19654                                 {
19655                                         deInt32 number = getInt(rnd);
19656                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19657                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19658                                         params["constValue"] = numberToString(number);
19659                                         break;
19660                                 }
19661                                 case NUMBERTYPE_UINT32:
19662                                 {
19663                                         deUint32 number = rnd.getUint32();
19664                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19665                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19666                                         params["constValue"] = numberToString(number);
19667                                         break;
19668                                 }
19669                                 case NUMBERTYPE_FLOAT32:
19670                                 {
19671                                         float number = rnd.getFloat();
19672                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
19673                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
19674                                         spec.verifyIO = &compareFloats;
19675                                         params["constValue"] = numberToString(number);
19676                                         break;
19677                                 }
19678                                 default:
19679                                         DE_ASSERT(false);
19680                         }
19681
19682                         // Initialized subcase
19683                         if (!subCase)
19684                         {
19685                                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19686                                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
19687                         }
19688                         // Uninitialized subcase
19689                         else
19690                         {
19691                                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19692                                 spec.verifyIO = &passthruVerify;
19693                                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
19694                         }
19695                 }
19696                 group->addChild(subGroup.release());
19697         }
19698         return group.release();
19699 }
19700
19701 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19702 {
19703         de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19704         RGBA                                                    defaultColors[4];
19705         map<string, string>                             opNopFragments;
19706
19707         getDefaultColors(defaultColors);
19708
19709         opNopFragments["testfun"]               =
19710                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19711                 "%param1 = OpFunctionParameter %v4f32\n"
19712                 "%label_testfun = OpLabel\n"
19713                 "OpNop\n"
19714                 "OpNop\n"
19715                 "OpNop\n"
19716                 "OpNop\n"
19717                 "OpNop\n"
19718                 "OpNop\n"
19719                 "OpNop\n"
19720                 "OpNop\n"
19721                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19722                 "%b = OpFAdd %f32 %a %a\n"
19723                 "OpNop\n"
19724                 "%c = OpFSub %f32 %b %a\n"
19725                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19726                 "OpNop\n"
19727                 "OpNop\n"
19728                 "OpReturnValue %ret\n"
19729                 "OpFunctionEnd\n";
19730
19731         createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19732
19733         return testGroup.release();
19734 }
19735
19736 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19737 {
19738         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19739         RGBA                                                    defaultColors[4];
19740         map<string, string>                             opNameFragments;
19741
19742         getDefaultColors(defaultColors);
19743
19744         opNameFragments["testfun"] =
19745                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19746                 "%param1     = OpFunctionParameter %v4f32\n"
19747                 "%label_func = OpLabel\n"
19748                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19749                 "%b          = OpFAdd %f32 %a %a\n"
19750                 "%c          = OpFSub %f32 %b %a\n"
19751                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19752                 "OpReturnValue %ret\n"
19753                 "OpFunctionEnd\n";
19754
19755         opNameFragments["debug"] =
19756                 "OpName %BP_main \"not_main\"";
19757
19758         createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19759
19760         return testGroup.release();
19761 }
19762
19763 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19764 {
19765         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19766
19767         testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19768         testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19769         testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19770         testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19771         testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19772         testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19773         testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19774         testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19775         testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19776         testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19777         testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19778         testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19779         testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19780         testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19781         testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19782
19783         return testGroup.release();
19784 }
19785
19786 tcu::TestCaseGroup* createFloat32Tests (tcu::TestContext& testCtx)
19787 {
19788         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19789
19790         testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
19791         testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
19792         testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
19793         testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
19794
19795         return testGroup.release();
19796 }
19797
19798 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19799 {
19800         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19801
19802         testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19803         testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19804         testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19805         testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19806         testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19807         testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19808         testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19809         testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19810         testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19811         testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19812         testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19813         testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19814         testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19815         testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19816         testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19817
19818         return testGroup.release();
19819 }
19820
19821 tcu::TestCaseGroup* createFloat32Group (tcu::TestContext& testCtx)
19822 {
19823         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19824
19825         testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
19826         testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
19827         testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
19828         testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
19829
19830         return testGroup.release();
19831 }
19832
19833 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19834 {
19835         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19836
19837         de::Random                                              rnd                             (deStringHash(group->getName()));
19838         const int               numElements             = 100;
19839         vector<float>   inputData               (numElements, 0);
19840         vector<float>   outputData              (numElements, 0);
19841         fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19842
19843         const StringTemplate                    shaderTemplate  (
19844                 "${CAPS}\n"
19845                 "OpMemoryModel Logical GLSL450\n"
19846                 "OpEntryPoint GLCompute %main \"main\" %id\n"
19847                 "OpExecutionMode %main LocalSize 1 1 1\n"
19848                 "OpSource GLSL 430\n"
19849                 "OpName %main           \"main\"\n"
19850                 "OpName %id             \"gl_GlobalInvocationID\"\n"
19851
19852                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19853
19854                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19855
19856                 "%id        = OpVariable %uvec3ptr Input\n"
19857                 "${CONST}\n"
19858                 "%main      = OpFunction %void None %voidf\n"
19859                 "%label     = OpLabel\n"
19860                 "%idval     = OpLoad %uvec3 %id\n"
19861                 "%x         = OpCompositeExtract %u32 %idval 0\n"
19862                 "%inloc     = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19863
19864                 "${TEST}\n"
19865
19866                 "%outloc    = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19867                 "             OpStore %outloc %res\n"
19868                 "             OpReturn\n"
19869                 "             OpFunctionEnd\n"
19870         );
19871
19872         // Each test case produces 4 boolean values, and we want each of these values
19873         // to come froma different combination of the available bit-sizes, so compute
19874         // all possible combinations here.
19875         vector<deUint32>        widths;
19876         widths.push_back(32);
19877         widths.push_back(16);
19878         widths.push_back(8);
19879
19880         vector<IVec4>   cases;
19881         for (size_t width0 = 0; width0 < widths.size(); width0++)
19882         {
19883                 for (size_t width1 = 0; width1 < widths.size(); width1++)
19884                 {
19885                         for (size_t width2 = 0; width2 < widths.size(); width2++)
19886                         {
19887                                 for (size_t width3 = 0; width3 < widths.size(); width3++)
19888                                 {
19889                                         cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19890                                 }
19891                         }
19892                 }
19893         }
19894
19895         for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19896         {
19897                 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19898                 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19899                         continue;
19900
19901                 map<string, string>     specializations;
19902                 ComputeShaderSpec       spec;
19903
19904                 // Inject appropriate capabilities and reference constants depending
19905                 // on the bit-sizes required by this test case
19906                 bool hasFloat32 = cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
19907                 bool hasFloat16 = cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
19908                 bool hasInt8    = cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
19909
19910                 string capsStr  = "OpCapability Shader\n";
19911                 string constStr =
19912                         "%c0i32     = OpConstant %i32 0\n"
19913                         "%c1f32     = OpConstant %f32 1.0\n"
19914                         "%c0f32     = OpConstant %f32 0.0\n";
19915
19916                 if (hasFloat32)
19917                 {
19918                         constStr        +=
19919                                 "%c10f32    = OpConstant %f32 10.0\n"
19920                                 "%c25f32    = OpConstant %f32 25.0\n"
19921                                 "%c50f32    = OpConstant %f32 50.0\n"
19922                                 "%c90f32    = OpConstant %f32 90.0\n";
19923                 }
19924
19925                 if (hasFloat16)
19926                 {
19927                         capsStr         += "OpCapability Float16\n";
19928                         constStr        +=
19929                                 "%f16       = OpTypeFloat 16\n"
19930                                 "%c10f16    = OpConstant %f16 10.0\n"
19931                                 "%c25f16    = OpConstant %f16 25.0\n"
19932                                 "%c50f16    = OpConstant %f16 50.0\n"
19933                                 "%c90f16    = OpConstant %f16 90.0\n";
19934                 }
19935
19936                 if (hasInt8)
19937                 {
19938                         capsStr         += "OpCapability Int8\n";
19939                         constStr        +=
19940                                 "%i8        = OpTypeInt 8 1\n"
19941                                 "%c10i8     = OpConstant %i8 10\n"
19942                                 "%c25i8     = OpConstant %i8 25\n"
19943                                 "%c50i8     = OpConstant %i8 50\n"
19944                                 "%c90i8     = OpConstant %i8 90\n";
19945                 }
19946
19947                 // Each invocation reads a different float32 value as input. Depending on
19948                 // the bit-sizes required by the particular test case, we also produce
19949                 // float16 and/or and int8 values by converting from the 32-bit float.
19950                 string testStr  = "";
19951                 testStr                 += "%inval32   = OpLoad %f32 %inloc\n";
19952                 if (hasFloat16)
19953                         testStr         += "%inval16   = OpFConvert %f16 %inval32\n";
19954                 if (hasInt8)
19955                         testStr         += "%inval8    = OpConvertFToS %i8 %inval32\n";
19956
19957                 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
19958                 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
19959                 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
19960                 // other way around, so in this case we want < instead of <=.
19961                 if (cases[caseNdx][0] == 32)
19962                         testStr         += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
19963                 else if (cases[caseNdx][0] == 16)
19964                         testStr         += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
19965                 else
19966                         testStr         += "%cmp1      = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
19967
19968                 if (cases[caseNdx][1] == 32)
19969                         testStr         += "%cmp2      = OpFOrdLessThan %bool %inval32 %c50f32\n";
19970                 else if (cases[caseNdx][1] == 16)
19971                         testStr         += "%cmp2      = OpFOrdLessThan %bool %inval16 %c50f16\n";
19972                 else
19973                         testStr         += "%cmp2      = OpSLessThan %bool %inval8 %c50i8\n";
19974
19975                 if (cases[caseNdx][2] == 32)
19976                         testStr         += "%cmp3      = OpFOrdLessThan %bool %inval32 %c10f32\n";
19977                 else if (cases[caseNdx][2] == 16)
19978                         testStr         += "%cmp3      = OpFOrdLessThan %bool %inval16 %c10f16\n";
19979                 else
19980                         testStr         += "%cmp3      = OpSLessThan %bool %inval8 %c10i8\n";
19981
19982                 if (cases[caseNdx][3] == 32)
19983                         testStr         += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
19984                 else if (cases[caseNdx][3] == 16)
19985                         testStr         += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
19986                 else
19987                         testStr         += "%cmp4      = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
19988
19989                 testStr                 += "%and1      = OpLogicalAnd %bool %cmp1 %cmp2\n";
19990                 testStr                 += "%or1       = OpLogicalOr %bool %cmp3 %cmp4\n";
19991                 testStr                 += "%or2       = OpLogicalOr %bool %and1 %or1\n";
19992                 testStr                 += "%not1      = OpLogicalNot %bool %or2\n";
19993                 testStr                 += "%res       = OpSelect %f32 %not1 %c1f32 %c0f32\n";
19994
19995                 specializations["CAPS"]         = capsStr;
19996                 specializations["CONST"]        = constStr;
19997                 specializations["TEST"]         = testStr;
19998
19999                 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
20000                 for (size_t ndx = 0; ndx < numElements; ++ndx)
20001                         outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
20002
20003                 spec.assembly = shaderTemplate.specialize(specializations);
20004                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
20005                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
20006                 spec.numWorkGroups = IVec3(numElements, 1, 1);
20007                 if (hasFloat16)
20008                         spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
20009                 if (hasInt8)
20010                         spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
20011                 spec.extensions.push_back("VK_KHR_shader_float16_int8");
20012
20013                 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
20014                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
20015         }
20016
20017         return group.release();
20018 }
20019
20020 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
20021 {
20022         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
20023
20024         testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20025
20026         return testGroup.release();
20027 }
20028
20029 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
20030 {
20031         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
20032         vector<CaseParameter>                   abuseCases;
20033         RGBA                                                    defaultColors[4];
20034         map<string, string>                             opNameFragments;
20035
20036         getOpNameAbuseCases(abuseCases);
20037         getDefaultColors(defaultColors);
20038
20039         opNameFragments["testfun"] =
20040                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20041                 "%param1     = OpFunctionParameter %v4f32\n"
20042                 "%label_func = OpLabel\n"
20043                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20044                 "%b          = OpFAdd %f32 %a %a\n"
20045                 "%c          = OpFSub %f32 %b %a\n"
20046                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20047                 "OpReturnValue %ret\n"
20048                 "OpFunctionEnd\n";
20049
20050         for (unsigned int i = 0; i < abuseCases.size(); i++)
20051         {
20052                 string casename;
20053                 casename = string("main") + abuseCases[i].name;
20054
20055                 opNameFragments["debug"] =
20056                         "OpName %BP_main \"" + abuseCases[i].param + "\"";
20057
20058                 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20059         }
20060
20061         for (unsigned int i = 0; i < abuseCases.size(); i++)
20062         {
20063                 string casename;
20064                 casename = string("b") + abuseCases[i].name;
20065
20066                 opNameFragments["debug"] =
20067                         "OpName %b \"" + abuseCases[i].param + "\"";
20068
20069                 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20070         }
20071
20072         {
20073                 opNameFragments["debug"] =
20074                         "OpName %test_code \"name1\"\n"
20075                         "OpName %param1    \"name2\"\n"
20076                         "OpName %a         \"name3\"\n"
20077                         "OpName %b         \"name4\"\n"
20078                         "OpName %c         \"name5\"\n"
20079                         "OpName %ret       \"name6\"\n";
20080
20081                 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20082         }
20083
20084         {
20085                 opNameFragments["debug"] =
20086                         "OpName %test_code \"the_same\"\n"
20087                         "OpName %param1    \"the_same\"\n"
20088                         "OpName %a         \"the_same\"\n"
20089                         "OpName %b         \"the_same\"\n"
20090                         "OpName %c         \"the_same\"\n"
20091                         "OpName %ret       \"the_same\"\n";
20092
20093                 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20094         }
20095
20096         {
20097                 opNameFragments["debug"] =
20098                         "OpName %BP_main \"to_be\"\n"
20099                         "OpName %BP_main \"or_not\"\n"
20100                         "OpName %BP_main \"to_be\"\n";
20101
20102                 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20103         }
20104
20105         {
20106                 opNameFragments["debug"] =
20107                         "OpName %b \"to_be\"\n"
20108                         "OpName %b \"or_not\"\n"
20109                         "OpName %b \"to_be\"\n";
20110
20111                 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20112         }
20113
20114         return abuseGroup.release();
20115 }
20116
20117
20118 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
20119 {
20120         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
20121         vector<CaseParameter>                   abuseCases;
20122         RGBA                                                    defaultColors[4];
20123         map<string, string>                             opMemberNameFragments;
20124
20125         getOpNameAbuseCases(abuseCases);
20126         getDefaultColors(defaultColors);
20127
20128         opMemberNameFragments["pre_main"] =
20129                 "%f3str = OpTypeStruct %f32 %f32 %f32\n";
20130
20131         opMemberNameFragments["testfun"] =
20132                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20133                 "%param1     = OpFunctionParameter %v4f32\n"
20134                 "%label_func = OpLabel\n"
20135                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20136                 "%b          = OpFAdd %f32 %a %a\n"
20137                 "%c          = OpFSub %f32 %b %a\n"
20138                 "%cstr       = OpCompositeConstruct %f3str %c %c %c\n"
20139                 "%d          = OpCompositeExtract %f32 %cstr 0\n"
20140                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20141                 "OpReturnValue %ret\n"
20142                 "OpFunctionEnd\n";
20143
20144         for (unsigned int i = 0; i < abuseCases.size(); i++)
20145         {
20146                 string casename;
20147                 casename = string("f3str_x") + abuseCases[i].name;
20148
20149                 opMemberNameFragments["debug"] =
20150                         "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20151
20152                 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20153         }
20154
20155         {
20156                 opMemberNameFragments["debug"] =
20157                         "OpMemberName %f3str 0 \"name1\"\n"
20158                         "OpMemberName %f3str 1 \"name2\"\n"
20159                         "OpMemberName %f3str 2 \"name3\"\n";
20160
20161                 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20162         }
20163
20164         {
20165                 opMemberNameFragments["debug"] =
20166                         "OpMemberName %f3str 0 \"the_same\"\n"
20167                         "OpMemberName %f3str 1 \"the_same\"\n"
20168                         "OpMemberName %f3str 2 \"the_same\"\n";
20169
20170                 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20171         }
20172
20173         {
20174                 opMemberNameFragments["debug"] =
20175                         "OpMemberName %f3str 0 \"to_be\"\n"
20176                         "OpMemberName %f3str 1 \"or_not\"\n"
20177                         "OpMemberName %f3str 0 \"to_be\"\n"
20178                         "OpMemberName %f3str 2 \"makes_no\"\n"
20179                         "OpMemberName %f3str 0 \"difference\"\n"
20180                         "OpMemberName %f3str 0 \"to_me\"\n";
20181
20182
20183                 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20184         }
20185
20186         return abuseGroup.release();
20187 }
20188
20189 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20190 {
20191         vector<deUint32>        result;
20192         de::Random                      rnd             (seed);
20193
20194         result.reserve(numDataPoints);
20195
20196         for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20197                 result.push_back(rnd.getUint32());
20198
20199         return result;
20200 }
20201
20202 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
20203 {
20204         vector<deUint32>        result;
20205
20206         result.reserve(inData1.size());
20207
20208         for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20209                 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20210
20211         return result;
20212 }
20213
20214 template<class SpecResource>
20215 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20216 {
20217         const deUint32                  numDataPoints   = 16;
20218         const std::string               testName                ("sparse_ids");
20219         const deUint32                  seed                    (deStringHash(testName.c_str()));
20220         const vector<deUint32>  inData1                 (getSparseIdsAbuseData(numDataPoints, seed + 1));
20221         const vector<deUint32>  inData2                 (getSparseIdsAbuseData(numDataPoints, seed + 2));
20222         const vector<deUint32>  outData                 (getSparseIdsAbuseResults(inData1, inData2));
20223         const StringTemplate    preMain
20224         (
20225                 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20226                 "   %up_u32 = OpTypePointer Uniform %u32\n"
20227                 "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20228                 "   %SSBO32 = OpTypeStruct %ra_u32\n"
20229                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20230                 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20231                 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20232                 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20233         );
20234         const StringTemplate    decoration
20235         (
20236                 "OpDecorate %ra_u32 ArrayStride 4\n"
20237                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20238                 "OpDecorate %SSBO32 BufferBlock\n"
20239                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20240                 "OpDecorate %ssbo_src0 Binding 0\n"
20241                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20242                 "OpDecorate %ssbo_src1 Binding 1\n"
20243                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20244                 "OpDecorate %ssbo_dst Binding 2\n"
20245         );
20246         const StringTemplate    testFun
20247         (
20248                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20249                 "    %param = OpFunctionParameter %v4f32\n"
20250
20251                 "    %entry = OpLabel\n"
20252                 "        %i = OpVariable %fp_i32 Function\n"
20253                 "             OpStore %i %c_i32_0\n"
20254                 "             OpBranch %loop\n"
20255
20256                 "     %loop = OpLabel\n"
20257                 "    %i_cmp = OpLoad %i32 %i\n"
20258                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20259                 "             OpLoopMerge %merge %next None\n"
20260                 "             OpBranchConditional %lt %write %merge\n"
20261
20262                 "    %write = OpLabel\n"
20263                 "      %ndx = OpLoad %i32 %i\n"
20264
20265                 "      %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20266                 "      %128 = OpLoad %u32 %127\n"
20267
20268                 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20269                 "  %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20270                 "  %4194001 = OpLoad %u32 %4194000\n"
20271
20272                 "  %2097151 = OpIAdd %u32 %128 %4194001\n"
20273                 "  %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20274                 "             OpStore %2097152 %2097151\n"
20275                 "             OpBranch %next\n"
20276
20277                 "     %next = OpLabel\n"
20278                 "    %i_cur = OpLoad %i32 %i\n"
20279                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20280                 "             OpStore %i %i_new\n"
20281                 "             OpBranch %loop\n"
20282
20283                 "    %merge = OpLabel\n"
20284                 "             OpReturnValue %param\n"
20285
20286                 "             OpFunctionEnd\n"
20287         );
20288         SpecResource                    specResource;
20289         map<string, string>             specs;
20290         VulkanFeatures                  features;
20291         map<string, string>             fragments;
20292         vector<string>                  extensions;
20293
20294         specs["num_data_points"]        = de::toString(numDataPoints);
20295
20296         fragments["decoration"]         = decoration.specialize(specs);
20297         fragments["pre_main"]           = preMain.specialize(specs);
20298         fragments["testfun"]            = testFun.specialize(specs);
20299
20300         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20301         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20302         specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20303
20304         if (std::is_base_of<GraphicsResources, SpecResource>::value)
20305         {
20306                 features.coreFeatures.vertexPipelineStoresAndAtomics    = true;
20307                 features.coreFeatures.fragmentStoresAndAtomics                  = true;
20308         }
20309
20310         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20311 }
20312
20313 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20314 {
20315         vector<deUint32>        result;
20316         de::Random                      rnd             (seed);
20317
20318         result.reserve(numDataPoints);
20319
20320         // Fixed value
20321         result.push_back(1u);
20322
20323         // Random values
20324         for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20325                 result.push_back(rnd.getUint8());
20326
20327         return result;
20328 }
20329
20330 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
20331 {
20332         vector<deUint32>        result;
20333
20334         result.reserve(inData1.size());
20335
20336         for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20337                 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20338
20339         return result;
20340 }
20341
20342 template<class SpecResource>
20343 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20344 {
20345         const deUint32                  numDataPoints   = 16;
20346         const deUint32                  firstNdx                = 100u;
20347         const deUint32                  sequenceCount   = 10000u;
20348         const std::string               testName                ("lots_ids");
20349         const deUint32                  seed                    (deStringHash(testName.c_str()));
20350         const vector<deUint32>  inData1                 (getLotsIdsAbuseData(numDataPoints, seed + 1));
20351         const vector<deUint32>  inData2                 (getLotsIdsAbuseData(numDataPoints, seed + 2));
20352         const vector<deUint32>  outData                 (getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20353         const StringTemplate preMain
20354         (
20355                 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20356                 "   %up_u32 = OpTypePointer Uniform %u32\n"
20357                 "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20358                 "   %SSBO32 = OpTypeStruct %ra_u32\n"
20359                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20360                 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20361                 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20362                 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20363         );
20364         const StringTemplate decoration
20365         (
20366                 "OpDecorate %ra_u32 ArrayStride 4\n"
20367                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20368                 "OpDecorate %SSBO32 BufferBlock\n"
20369                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20370                 "OpDecorate %ssbo_src0 Binding 0\n"
20371                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20372                 "OpDecorate %ssbo_src1 Binding 1\n"
20373                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20374                 "OpDecorate %ssbo_dst Binding 2\n"
20375         );
20376         const StringTemplate testFun
20377         (
20378                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20379                 "    %param = OpFunctionParameter %v4f32\n"
20380
20381                 "    %entry = OpLabel\n"
20382                 "        %i = OpVariable %fp_i32 Function\n"
20383                 "             OpStore %i %c_i32_0\n"
20384                 "             OpBranch %loop\n"
20385
20386                 "     %loop = OpLabel\n"
20387                 "    %i_cmp = OpLoad %i32 %i\n"
20388                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20389                 "             OpLoopMerge %merge %next None\n"
20390                 "             OpBranchConditional %lt %write %merge\n"
20391
20392                 "    %write = OpLabel\n"
20393                 "      %ndx = OpLoad %i32 %i\n"
20394
20395                 "       %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20396                 "       %91 = OpLoad %u32 %90\n"
20397
20398                 "       %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20399                 "       %${zeroth_id} = OpLoad %u32 %98\n"
20400
20401                 "${seq}\n"
20402
20403                 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20404                 "      %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20405                 "             OpStore %dst %${last_id}\n"
20406                 "             OpBranch %next\n"
20407
20408                 "     %next = OpLabel\n"
20409                 "    %i_cur = OpLoad %i32 %i\n"
20410                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20411                 "             OpStore %i %i_new\n"
20412                 "             OpBranch %loop\n"
20413
20414                 "    %merge = OpLabel\n"
20415                 "             OpReturnValue %param\n"
20416
20417                 "             OpFunctionEnd\n"
20418         );
20419         deUint32                                lastId                  = firstNdx;
20420         SpecResource                    specResource;
20421         map<string, string>             specs;
20422         VulkanFeatures                  features;
20423         map<string, string>             fragments;
20424         vector<string>                  extensions;
20425         std::string                             sequence;
20426
20427         for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20428         {
20429                 const deUint32          sequenceId              = sequenceNdx + firstNdx;
20430                 const std::string       sequenceIdStr   = de::toString(sequenceId);
20431
20432                 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20433                 lastId = sequenceId;
20434
20435                 if (sequenceNdx == 0)
20436                         sequence.reserve((10 + sequence.length()) * sequenceCount);
20437         }
20438
20439         specs["num_data_points"]        = de::toString(numDataPoints);
20440         specs["zeroth_id"]                      = de::toString(firstNdx - 1);
20441         specs["last_id"]                        = de::toString(lastId);
20442         specs["seq"]                            = sequence;
20443
20444         fragments["decoration"]         = decoration.specialize(specs);
20445         fragments["pre_main"]           = preMain.specialize(specs);
20446         fragments["testfun"]            = testFun.specialize(specs);
20447
20448         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20449         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20450         specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20451
20452         if (std::is_base_of<GraphicsResources, SpecResource>::value)
20453         {
20454                 features.coreFeatures.vertexPipelineStoresAndAtomics    = true;
20455                 features.coreFeatures.fragmentStoresAndAtomics                  = true;
20456         }
20457
20458         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20459 }
20460
20461 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
20462 {
20463         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20464
20465         createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20466         createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20467
20468         return testGroup.release();
20469 }
20470
20471 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
20472 {
20473         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20474
20475         createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20476         createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20477
20478         return testGroup.release();
20479 }
20480
20481 tcu::TestCaseGroup* createFunctionParamsGroup (tcu::TestContext& testCtx)
20482 {
20483         de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function_params", "Function parameter tests"));
20484 #ifndef CTS_USES_VULKANSC
20485         static const char data_dir[] = "spirv_assembly/instruction/function_params";
20486
20487         static const struct
20488         {
20489                 const std::string name;
20490                 const std::string desc;
20491         } cases[] =
20492         {
20493                 { "sampler_param", "Test combined image sampler as function parameter" },
20494         };
20495
20496         for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20497         {
20498                 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20499                                                                                                                                                         cases[i].name.c_str(),
20500                                                                                                                                                         cases[i].desc.c_str(),
20501                                                                                                                                                         data_dir,
20502                                                                                                                                                         cases[i].name + ".amber");
20503                 testGroup->addChild(testCase);
20504         }
20505 #endif
20506         return testGroup.release();
20507 }
20508
20509 tcu::TestCaseGroup* createEarlyFragmentTests(tcu::TestContext& testCtx)
20510 {
20511         de::MovePtr<tcu::TestCaseGroup> earlyFragTests (new tcu::TestCaseGroup(testCtx, "early_fragment", "Early Fragment Tests"));
20512
20513 #ifndef CTS_USES_VULKANSC
20514         static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20515
20516         static const struct Case
20517         {
20518                 const string name;
20519                 const string desc;
20520         }
20521         cases[] =
20522         {
20523                 // Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20524                 { "depth_less",                         "gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."      },
20525                 { "depth_greater",                      "gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."      },
20526                 { "depth_less_or_equal",        "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."     },
20527                 { "depth_greater_or_equal",     "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."     },
20528                 { "depth_equal",                        "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."     },
20529                 { "depth_not_equal",            "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."     }
20530         };
20531
20532         for (const auto& tCase : cases)
20533         {
20534                 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20535                         tCase.name.c_str(),
20536                         tCase.desc.c_str(),
20537                         dataDir,
20538                         tCase.name + ".amber");
20539
20540                 earlyFragTests->addChild(testCase);
20541         }
20542 #endif // CTS_USES_VULKANSC
20543
20544         return earlyFragTests.release();
20545 }
20546
20547 tcu::TestCaseGroup* createEarlyAndLateFragmentTests(tcu::TestContext& testCtx)
20548 {
20549         de::MovePtr<tcu::TestCaseGroup> earlyLateFragTests(new tcu::TestCaseGroup(testCtx, "early_and_late_fragment", "Early And Late Fragment Tests"));
20550 #ifndef CTS_USES_VULKANSC
20551         static const char dataDir[] = "spirv_assembly/instruction/graphics/early_and_late_fragment";
20552
20553         static const struct Case
20554         {
20555                 const string name;
20556                 const string desc;
20557         }       cases[] =
20558         {
20559                 { "depth_less",                         "gl_FragDepth < CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."      },
20560                 { "depth_greater",                      "gl_FragDepth > CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."      },
20561                 { "depth_less_or_equal",        "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."     },
20562                 { "depth_greater_or_equal",     "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."     },
20563                 { "depth_equal",                        "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."     },
20564                 { "depth_not_equal",            "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."     }
20565         };
20566
20567         for (const auto& tCase : cases)
20568         {
20569                 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20570                         tCase.name.c_str(),
20571                         tCase.desc.c_str(),
20572                         dataDir,
20573                         tCase.name + ".amber",
20574                         { "VK_AMD_shader_early_and_late_fragment_tests" });
20575
20576                 earlyLateFragTests->addChild(testCase);
20577         }
20578 #endif
20579
20580         return earlyLateFragTests.release();
20581 }
20582
20583 tcu::TestCaseGroup* createOpExecutionModeTests (tcu::TestContext& testCtx)
20584 {
20585         de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "execution_mode", "Execution mode tests"));
20586
20587 #ifndef CTS_USES_VULKANSC
20588         static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
20589
20590         static const struct Case
20591         {
20592                 const string name;
20593                 const string desc;
20594         } cases[] =
20595         {
20596                 { "depthless_0",                "FragDepth < Polygon depth: depth test should pass." },
20597                 { "depthless_1",                "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, but the depth test should pass." },
20598                 { "depthless_2",                "FragDepth < Polygon depth: depth test should fail." },
20599                 { "depthless_3",                "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, the depth test should fail." },
20600                 { "depthless_4",                "FragDepth < Polygon depth: depth test should pass." },
20601                 { "depthgreater_0",             "FragDepth > Polygon depth: depth test should pass." },
20602                 { "depthgreater_1",             "FragDepth < Polygon depth: violates the promise that FragDepth is greater than the implicit depth, but the depth test should pass." },
20603                 { "depthgreater_2",             "FragDepth > Polygon depth: depth test should fail." },
20604                 { "depthgreater_3",             "FragDepth > Polygon depth: violates the promise that FragDepth is greater than the implicit depth, the depth test should fail." },
20605                 { "depthgreater_4",             "FragDepth > Polygon depth: depth test should pass." },
20606                 { "depthunchanged_0",   "FragDepth == Polygon depth: depth test should pass." },
20607                 { "depthunchanged_1",   "FragDepth == Polygon depth: depth test should fail." },
20608                 { "depthunchanged_2",   "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should pass." },
20609                 { "depthunchanged_3",   "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should fail." },
20610         };
20611
20612         for (const auto& case_ : cases)
20613         {
20614                 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20615                                                                                                                                                         case_.name.c_str(),
20616                                                                                                                                                         case_.desc.c_str(),
20617                                                                                                                                                         dataDir,
20618                                                                                                                                                         case_.name + ".amber");
20619                 testGroup->addChild(testCase);
20620         }
20621 #endif // CTS_USES_VULKANSC
20622
20623         return testGroup.release();
20624 }
20625
20626 tcu::TestCaseGroup* createOpMulExtendedGroup (tcu::TestContext& testCtx)
20627 {
20628         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "mul_extended", "Op[S/U]MulExtended tests"));
20629
20630 #ifndef CTS_USES_VULKANSC
20631         static const char       dataDir[]       = "spirv_assembly/instruction/compute/mul_extended";
20632
20633         static const struct Case
20634         {
20635                 const string name;
20636                 const vector<string> features;
20637         } cases[] =
20638         {
20639                 {       "signed_16bit",         {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}                       },
20640                 {       "signed_32bit",         {}                                                                                                                                                                      },
20641                 {       "signed_64bit",         {"Features.shaderInt64"}                                                                                                                        },
20642                 {       "signed_8bit",          {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}       },
20643                 {       "unsigned_16bit",       {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}                       },
20644                 {       "unsigned_32bit",       {}                                                                                                                                                                      },
20645                 {       "unsigned_64bit",       {"Features.shaderInt64"}                                                                                                                        },
20646                 {       "unsigned_8bit",        {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}       }
20647         };
20648
20649         for (const auto& test : cases)
20650         {
20651                 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20652                                                                                                                                                         test.name.c_str(),
20653                                                                                                                                                         "",
20654                                                                                                                                                         dataDir,
20655                                                                                                                                                         test.name + ".amber",
20656                                                                                                                                                         test.features);
20657                 testGroup->addChild(testCase);
20658         }
20659 #endif // CTS_USES_VULKANSC
20660
20661         return testGroup.release();
20662 }
20663
20664 tcu::TestCaseGroup* createQueryGroup (tcu::TestContext& testCtx)
20665 {
20666         de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "image_query", "image query tests"));
20667
20668 #ifndef CTS_USES_VULKANSC
20669         static const char data_dir[] = "spirv_assembly/instruction/image_query";
20670
20671         static const struct
20672         {
20673                 const std::string name;
20674                 const std::string desc;
20675         } cases[] =
20676         {
20677                 { "samples_storage", "Test samples query can be used on storage images" },
20678         };
20679
20680         vector<string> requirements(1, "Features.shaderStorageImageMultisample");
20681
20682         for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20683         {
20684                 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20685                                                                                                                                                         cases[i].name.c_str(),
20686                                                                                                                                                         cases[i].desc.c_str(),
20687                                                                                                                                                         data_dir,
20688                                                                                                                                                         cases[i].name + ".amber",
20689                                                                                                                                                         requirements);
20690                 testGroup->addChild(testCase);
20691         }
20692 #endif // CTS_USES_VULKANSC
20693
20694         return testGroup.release();
20695 }
20696
20697 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
20698 {
20699         const bool testComputePipeline = true;
20700
20701         de::MovePtr<tcu::TestCaseGroup> instructionTests        (new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
20702         de::MovePtr<tcu::TestCaseGroup> computeTests            (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
20703         de::MovePtr<tcu::TestCaseGroup> graphicsTests           (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
20704
20705         computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
20706         computeTests->addChild(createLocalSizeGroup(testCtx, false));
20707         computeTests->addChild(createLocalSizeGroup(testCtx, true));
20708         computeTests->addChild(createNonSemanticInfoGroup(testCtx));
20709         computeTests->addChild(createOpNopGroup(testCtx));
20710         computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
20711         computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
20712         computeTests->addChild(createOpAtomicGroup(testCtx, false));
20713         computeTests->addChild(createOpAtomicGroup(testCtx, true));                                     // Using new StorageBuffer decoration
20714         computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true));        // Return value validation
20715         computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true)); // volatile atomics
20716         computeTests->addChild(createOpLineGroup(testCtx));
20717         computeTests->addChild(createOpModuleProcessedGroup(testCtx));
20718         computeTests->addChild(createOpNoLineGroup(testCtx));
20719         computeTests->addChild(createOpConstantNullGroup(testCtx));
20720         computeTests->addChild(createOpConstantCompositeGroup(testCtx));
20721         computeTests->addChild(createOpConstantUsageGroup(testCtx));
20722         computeTests->addChild(createSpecConstantGroup(testCtx));
20723         computeTests->addChild(createOpSourceGroup(testCtx));
20724         computeTests->addChild(createOpSourceExtensionGroup(testCtx));
20725         computeTests->addChild(createDecorationGroupGroup(testCtx));
20726         computeTests->addChild(createOpPhiGroup(testCtx));
20727         computeTests->addChild(createLoopControlGroup(testCtx));
20728         computeTests->addChild(createFunctionControlGroup(testCtx));
20729         computeTests->addChild(createSelectionControlGroup(testCtx));
20730         computeTests->addChild(createBlockOrderGroup(testCtx));
20731         computeTests->addChild(createMultipleShaderGroup(testCtx));
20732         computeTests->addChild(createMemoryAccessGroup(testCtx));
20733         computeTests->addChild(createOpCopyMemoryGroup(testCtx));
20734         computeTests->addChild(createOpCopyObjectGroup(testCtx));
20735         computeTests->addChild(createNoContractionGroup(testCtx));
20736         computeTests->addChild(createOpUndefGroup(testCtx));
20737         computeTests->addChild(createOpUnreachableGroup(testCtx));
20738         computeTests->addChild(createOpQuantizeToF16Group(testCtx));
20739         computeTests->addChild(createOpFRemGroup(testCtx));
20740         computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20741         computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20742         computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20743         computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20744 #ifndef CTS_USES_VULKANSC
20745         computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
20746         computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
20747         computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
20748         computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
20749         computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
20750         computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
20751 #endif // CTS_USES_VULKANSC
20752         computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
20753         computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
20754         computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
20755         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
20756         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
20757         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
20758         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
20759         computeTests->addChild(createOpCompositeInsertGroup(testCtx));
20760         computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
20761         computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
20762         computeTests->addChild(createOpNMinGroup(testCtx));
20763         computeTests->addChild(createOpNMaxGroup(testCtx));
20764         computeTests->addChild(createOpNClampGroup(testCtx));
20765         computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
20766         {
20767                 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests     (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20768
20769                 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20770                 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20771
20772                 computeTests->addChild(computeAndroidTests.release());
20773         }
20774
20775         computeTests->addChild(create8BitStorageComputeGroup(testCtx));
20776         computeTests->addChild(create16BitStorageComputeGroup(testCtx));
20777         computeTests->addChild(createFloatControlsComputeGroup(testCtx));
20778         computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
20779         computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
20780         computeTests->addChild(createVariableInitComputeGroup(testCtx));
20781         computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
20782         computeTests->addChild(createIndexingComputeGroup(testCtx));
20783         computeTests->addChild(createVariablePointersComputeGroup(testCtx));
20784         computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
20785         computeTests->addChild(createImageSamplerComputeGroup(testCtx));
20786         computeTests->addChild(createOpNameGroup(testCtx));
20787         computeTests->addChild(createOpMemberNameGroup(testCtx));
20788         computeTests->addChild(createPointerParameterComputeGroup(testCtx));
20789         computeTests->addChild(createFloat16Group(testCtx));
20790 #ifndef CTS_USES_VULKANSC
20791         computeTests->addChild(createFloat32Group(testCtx));
20792 #endif // CTS_USES_VULKANSC
20793         computeTests->addChild(createBoolGroup(testCtx));
20794         computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
20795         computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
20796 #ifndef CTS_USES_VULKANSC
20797         computeTests->addChild(createSignedIntCompareGroup(testCtx));
20798         computeTests->addChild(createSignedOpTestsGroup(testCtx));
20799 #endif // CTS_USES_VULKANSC
20800         computeTests->addChild(createUnusedVariableComputeTests(testCtx));
20801 #ifndef CTS_USES_VULKANSC
20802         computeTests->addChild(createPtrAccessChainGroup(testCtx));
20803         computeTests->addChild(createVectorShuffleGroup(testCtx));
20804 #endif // CTS_USES_VULKANSC
20805         computeTests->addChild(createHlslComputeGroup(testCtx));
20806         computeTests->addChild(createEmptyStructComputeGroup(testCtx));
20807         computeTests->addChild(create64bitCompareComputeGroup(testCtx));
20808 #ifndef CTS_USES_VULKANSC
20809         computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
20810 #endif // CTS_USES_VULKANSC
20811         computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
20812         computeTests->addChild(createOpMulExtendedGroup(testCtx));
20813
20814         graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
20815         graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
20816         graphicsTests->addChild(createOpNopTests(testCtx));
20817         graphicsTests->addChild(createOpSourceTests(testCtx));
20818         graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
20819         graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
20820         graphicsTests->addChild(createOpLineTests(testCtx));
20821         graphicsTests->addChild(createOpNoLineTests(testCtx));
20822         graphicsTests->addChild(createOpConstantNullTests(testCtx));
20823         graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
20824         graphicsTests->addChild(createMemoryAccessTests(testCtx));
20825         graphicsTests->addChild(createOpUndefTests(testCtx));
20826         graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
20827         graphicsTests->addChild(createModuleTests(testCtx));
20828         graphicsTests->addChild(createUnusedVariableTests(testCtx));
20829         graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
20830         graphicsTests->addChild(createOpPhiTests(testCtx));
20831         graphicsTests->addChild(createNoContractionTests(testCtx));
20832         graphicsTests->addChild(createOpQuantizeTests(testCtx));
20833         graphicsTests->addChild(createLoopTests(testCtx));
20834         graphicsTests->addChild(createSpecConstantTests(testCtx));
20835         graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
20836         graphicsTests->addChild(createBarrierTests(testCtx));
20837         graphicsTests->addChild(createDecorationGroupTests(testCtx));
20838         graphicsTests->addChild(createFRemTests(testCtx));
20839         graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20840         graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20841
20842         {
20843                 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests    (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20844
20845                 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20846                 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20847
20848                 graphicsTests->addChild(graphicsAndroidTests.release());
20849         }
20850
20851         graphicsTests->addChild(createOpNameTests(testCtx));
20852         graphicsTests->addChild(createOpNameAbuseTests(testCtx));
20853         graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
20854
20855         graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
20856         graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
20857         graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
20858         graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
20859         graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
20860         graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
20861         graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
20862         graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
20863         graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
20864         graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
20865         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
20866         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
20867         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
20868         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
20869         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
20870         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
20871         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
20872         graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
20873         graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
20874         graphicsTests->addChild(createFloat16Tests(testCtx));
20875 #ifndef CTS_USES_VULKANSC
20876         graphicsTests->addChild(createFloat32Tests(testCtx));
20877 #endif // CTS_USES_VULKANSC
20878         graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
20879         graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
20880         graphicsTests->addChild(createEarlyFragmentTests(testCtx));
20881         graphicsTests->addChild(createEarlyAndLateFragmentTests(testCtx));
20882         graphicsTests->addChild(createOpExecutionModeTests(testCtx));
20883
20884         instructionTests->addChild(computeTests.release());
20885         instructionTests->addChild(graphicsTests.release());
20886 #ifndef CTS_USES_VULKANSC
20887         instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
20888         instructionTests->addChild(createFunctionParamsGroup(testCtx));
20889 #endif // CTS_USES_VULKANSC
20890         instructionTests->addChild(createQueryGroup(testCtx));
20891         instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
20892         instructionTests->addChild(createTerminateInvocationGroup(testCtx));
20893
20894         return instructionTests.release();
20895 }
20896
20897 } // SpirVAssembly
20898 } // vkt