external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2015 Google Inc.
   6  * Copyright (c) 2016 The Khronos Group Inc.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  *
  20  *//*!
  21  * \file
  22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
  23  *//*--------------------------------------------------------------------*/
  24
  25 #include "vktSpvAsmInstructionTests.hpp"
  26
  27 #include "tcuCommandLine.hpp"
  28 #include "tcuFormatUtil.hpp"
  29 #include "tcuFloat.hpp"
  30 #include "tcuFloatFormat.hpp"
  31 #include "tcuRGBA.hpp"
  32 #include "tcuStringTemplate.hpp"
  33 #include "tcuTestLog.hpp"
  34 #include "tcuVectorUtil.hpp"
  35 #include "tcuInterval.hpp"
  36
  37 #include "vkDefs.hpp"
  38 #include "vkDeviceUtil.hpp"
  39 #include "vkMemUtil.hpp"
  40 #include "vkPlatform.hpp"
  41 #include "vkPrograms.hpp"
  42 #include "vkQueryUtil.hpp"
  43 #include "vkRef.hpp"
  44 #include "vkRefUtil.hpp"
  45 #include "vkStrUtil.hpp"
  46 #include "vkTypeUtil.hpp"
  47
  48 #include "deStringUtil.hpp"
  49 #include "deUniquePtr.hpp"
  50 #include "deMath.h"
  51 #include "deRandom.hpp"
  52 #include "tcuStringTemplate.hpp"
  53
  54 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
  55 #include "vktSpvAsm8bitStorageTests.hpp"
  56 #include "vktSpvAsm16bitStorageTests.hpp"
  57 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
  58 #include "vktSpvAsmConditionalBranchTests.hpp"
  59 #include "vktSpvAsmIndexingTests.hpp"
  60 #include "vktSpvAsmImageSamplerTests.hpp"
  61 #include "vktSpvAsmComputeShaderCase.hpp"
  62 #include "vktSpvAsmComputeShaderTestUtil.hpp"
  63 #include "vktSpvAsmFloatControlsTests.hpp"
  64 #include "vktSpvAsmFromHlslTests.hpp"
  65 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
  66 #include "vktSpvAsmVariablePointersTests.hpp"
  67 #include "vktSpvAsmVariableInitTests.hpp"
  68 #include "vktSpvAsmPointerParameterTests.hpp"
  69 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
  70 #include "vktSpvAsmSpirvVersionTests.hpp"
  71 #include "vktTestCaseUtil.hpp"
  72 #include "vktSpvAsmLoopDepLenTests.hpp"
  73 #include "vktSpvAsmLoopDepInfTests.hpp"
  74 #include "vktSpvAsmCompositeInsertTests.hpp"
  75 #include "vktSpvAsmVaryingNameTests.hpp"
  76 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
  77 #include "vktSpvAsmSignedIntCompareTests.hpp"
  78 #include "vktSpvAsmPtrAccessChainTests.hpp"
  79 #include "vktSpvAsm64bitCompareTests.hpp"
  80
  81 #include <cmath>
  82 #include <limits>
  83 #include <map>
  84 #include <string>
  85 #include <sstream>
  86 #include <utility>
  87 #include <stack>
  88
  89 namespace vkt
  90 {
  91 namespace SpirVAssembly
  92 {
  93
  94 namespace
  95 {
  96
  97 using namespace vk;
  98 using std::map;
  99 using std::string;
 100 using std::vector;
 101 using tcu::IVec3;
 102 using tcu::IVec4;
 103 using tcu::RGBA;
 104 using tcu::TestLog;
 105 using tcu::TestStatus;
 106 using tcu::Vec4;
 107 using de::UniquePtr;
 108 using tcu::StringTemplate;
 109 using tcu::Vec4;
 110
 111 const bool TEST_WITH_NAN        = true;
 112 const bool TEST_WITHOUT_NAN     = false;
 113
 114 const string loadScalarF16FromUint =
 115         "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
 116         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 117         "%ld_arg_${var}_entry = OpLabel\n"
 118         "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
 119         "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
 120         "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
 121         "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
 122         "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
 123         "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
 124         "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
 125         "OpReturnValue %ld_arg_${var}_ex\n"
 126         "OpFunctionEnd\n";
 127
 128 const string loadV2F16FromUint =
 129         "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
 130         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 131         "%ld_arg_${var}_entry = OpLabel\n"
 132         "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
 133         "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
 134         "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
 135         "OpReturnValue %ld_arg_${var}_cast\n"
 136         "OpFunctionEnd\n";
 137
 138 const string loadV3F16FromUints =
 139         // Since we allocate a vec4 worth of values, this case is almost the
 140         // same as that case.
 141         "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
 142         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 143         "%ld_arg_${var}_entry = OpLabel\n"
 144         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 145         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 146         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 147         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 148         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 149         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 150         "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
 151         "OpReturnValue %ld_arg_${var}_shuffle\n"
 152         "OpFunctionEnd\n";
 153
 154 const string loadV4F16FromUints =
 155         "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
 156         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 157         "%ld_arg_${var}_entry = OpLabel\n"
 158         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 159         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 160         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 161         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 162         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 163         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 164         "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
 165         "OpReturnValue %ld_arg_${var}_shuffle\n"
 166         "OpFunctionEnd\n";
 167
 168 const string loadM2x2F16FromUints =
 169         "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
 170         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 171         "%ld_arg_${var}_entry = OpLabel\n"
 172         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 173         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 174         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 175         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 176         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 177         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 178         "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
 179         "OpReturnValue %ld_arg_${var}_cons\n"
 180         "OpFunctionEnd\n";
 181
 182 const string loadM2x3F16FromUints =
 183         "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
 184         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 185         "%ld_arg_${var}_entry = OpLabel\n"
 186         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 187         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 188         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 189         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 190         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 191         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 192         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 193         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 194         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 195         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 196         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 197         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 198         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 199         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 200         "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
 201         "OpReturnValue %ld_arg_${var}_mat\n"
 202         "OpFunctionEnd\n";
 203
 204 const string loadM2x4F16FromUints =
 205         "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
 206         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 207         "%ld_arg_${var}_entry = OpLabel\n"
 208         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 209         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 210         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 211         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 212         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 213         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 214         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 215         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 216         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 217         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 218         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 219         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 220         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 221         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 222         "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
 223         "OpReturnValue %ld_arg_${var}_mat\n"
 224         "OpFunctionEnd\n";
 225
 226 const string loadM3x2F16FromUints =
 227         "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
 228         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 229         "%ld_arg_${var}_entry = OpLabel\n"
 230         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 231         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 232         "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 233         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 234         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 235         "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
 236         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 237         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 238         "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
 239         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
 240         "OpReturnValue %ld_arg_${var}_mat\n"
 241         "OpFunctionEnd\n";
 242
 243 const string loadM3x3F16FromUints =
 244         "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
 245         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 246         "%ld_arg_${var}_entry = OpLabel\n"
 247         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 248         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 249         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 250         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 251         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 252         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 253         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 254         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 255         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 256         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 257         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 258         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 259         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 260         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 261         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 262         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 263         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 264         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 265         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 266         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 267         "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
 268         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
 269         "OpReturnValue %ld_arg_${var}_mat\n"
 270         "OpFunctionEnd\n";
 271
 272 const string loadM3x4F16FromUints =
 273         "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
 274         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 275         "%ld_arg_${var}_entry = OpLabel\n"
 276         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 277         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 278         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 279         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 280         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 281         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 282         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 283         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 284         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 285         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 286         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 287         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 288         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 289         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 290         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 291         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 292         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 293         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 294         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 295         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 296         "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
 297         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
 298         "OpReturnValue %ld_arg_${var}_mat\n"
 299         "OpFunctionEnd\n";
 300
 301 const string loadM4x2F16FromUints =
 302         "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
 303         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 304         "%ld_arg_${var}_entry = OpLabel\n"
 305         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 306         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 307         "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 308         "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 309         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 310         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 311         "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
 312         "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
 313         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 314         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 315         "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
 316         "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
 317         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
 318         "OpReturnValue %ld_arg_${var}_mat\n"
 319         "OpFunctionEnd\n";
 320
 321 const string loadM4x3F16FromUints =
 322         "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
 323         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 324         "%ld_arg_${var}_entry = OpLabel\n"
 325         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 326         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 327         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 328         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 329         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 330         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 331         "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
 332         "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
 333         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 334         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 335         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 336         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 337         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 338         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 339         "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
 340         "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
 341         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 342         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 343         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 344         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 345         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 346         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 347         "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
 348         "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
 349         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 350         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 351         "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
 352         "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
 353         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
 354         "OpReturnValue %ld_arg_${var}_mat\n"
 355         "OpFunctionEnd\n";
 356
 357 const string loadM4x4F16FromUints =
 358         "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
 359         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 360         "%ld_arg_${var}_entry = OpLabel\n"
 361         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 362         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 363         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 364         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 365         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 366         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 367         "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
 368         "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
 369         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 370         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 371         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 372         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 373         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 374         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 375         "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
 376         "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
 377         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 378         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 379         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 380         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 381         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 382         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 383         "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
 384         "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
 385         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 386         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 387         "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
 388         "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
 389         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
 390         "OpReturnValue %ld_arg_${var}_mat\n"
 391         "OpFunctionEnd\n";
 392
 393 const string storeScalarF16AsUint =
 394         // This version is sensitive to the initial value in the output buffer.
 395         // The infrastructure sets all output buffer bits to one before invoking
 396         // the shader so this version uses an atomic and to generate the correct
 397         // zeroes.
 398         "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
 399         "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
 400         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 401         "%st_fn_${var}_entry = OpLabel\n"
 402         "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
 403         "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
 404         "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
 405         "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
 406         // Or 16 bits of ones into the half that was not populated with the result.
 407         "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
 408         "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
 409         "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
 410         "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
 411         "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
 412         "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
 413         "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
 414         "OpReturn\n"
 415         "OpFunctionEnd\n";
 416
 417 const string storeV2F16AsUint =
 418         "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
 419         "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
 420         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 421         "%st_fn_${var}_entry = OpLabel\n"
 422         "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
 423         "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
 424         "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
 425         "OpReturn\n"
 426         "OpFunctionEnd\n";
 427
 428 const string storeV3F16AsUints =
 429         // Since we allocate a vec4 worth of values, this case can be treated
 430         // almost the same as a vec4 case. We will store some extra data that
 431         // should not be compared.
 432         "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
 433         "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
 434         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 435         "%st_fn_${var}_entry = OpLabel\n"
 436         "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
 437         "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
 438         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
 439         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
 440         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 441         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 442         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 443         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 444         "OpReturn\n"
 445         "OpFunctionEnd\n";
 446
 447 const string storeV4F16AsUints =
 448         "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
 449         "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
 450         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 451         "%st_fn_${var}_entry = OpLabel\n"
 452         "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
 453         "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
 454         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
 455         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
 456         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 457         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 458         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 459         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 460         "OpReturn\n"
 461         "OpFunctionEnd\n";
 462
 463 const string storeM2x2F16AsUints =
 464         "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
 465         "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
 466         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 467         "%st_fn_${var}_entry = OpLabel\n"
 468         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 469         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 470         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 471         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 472         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 473         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 474         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 475         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 476         "OpReturn\n"
 477         "OpFunctionEnd\n";
 478
 479 const string storeM2x3F16AsUints =
 480         // In the extracted elements for 01 and 11 the second element doesn't
 481         // matter.
 482         "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
 483         "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
 484         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 485         "%st_fn_${var}_entry = OpLabel\n"
 486         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 487         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 488         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 489         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 490         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 491         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 492         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 493         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 494         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 495         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 496         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 497         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 498         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 499         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 500         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 501         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 502         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 503         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 504         "OpReturn\n"
 505         "OpFunctionEnd\n";
 506
 507 const string storeM2x4F16AsUints =
 508         "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
 509         "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
 510         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 511         "%st_fn_${var}_entry = OpLabel\n"
 512         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 513         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 514         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 515         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 516         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 517         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 518         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 519         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 520         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 521         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 522         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 523         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 524         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 525         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 526         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 527         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 528         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 529         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 530         "OpReturn\n"
 531         "OpFunctionEnd\n";
 532
 533 const string storeM3x2F16AsUints =
 534         "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
 535         "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
 536         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 537         "%st_fn_${var}_entry = OpLabel\n"
 538         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 539         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 540         "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
 541         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 542         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 543         "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
 544         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 545         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 546         "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 547         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 548         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 549         "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
 550         "OpReturn\n"
 551         "OpFunctionEnd\n";
 552
 553 const string storeM3x3F16AsUints =
 554         // The second element of the each broken down vec3 doesn't matter.
 555         "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
 556         "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
 557         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 558         "%st_fn_${var}_entry = OpLabel\n"
 559         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 560         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 561         "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
 562         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 563         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 564         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 565         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 566         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 567         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 568         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 569         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 570         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 571         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 572         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 573         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 574         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 575         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 576         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 577         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 578         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 579         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 580         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 581         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 582         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 583         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 584         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 585         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 586         "OpReturn\n"
 587         "OpFunctionEnd\n";
 588
 589 const string storeM3x4F16AsUints =
 590         "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
 591         "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
 592         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 593         "%st_fn_${var}_entry = OpLabel\n"
 594         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 595         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 596         "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
 597         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 598         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 599         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 600         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 601         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 602         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 603         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 604         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 605         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 606         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 607         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 608         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 609         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 610         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 611         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 612         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 613         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 614         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 615         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 616         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 617         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 618         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 619         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 620         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 621         "OpReturn\n"
 622         "OpFunctionEnd\n";
 623
 624 const string storeM4x2F16AsUints =
 625         "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
 626         "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
 627         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 628         "%st_fn_${var}_entry = OpLabel\n"
 629         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 630         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 631         "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
 632         "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
 633         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 634         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 635         "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
 636         "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
 637         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 638         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 639         "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 640         "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 641         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 642         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 643         "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
 644         "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
 645         "OpReturn\n"
 646         "OpFunctionEnd\n";
 647
 648 const string storeM4x3F16AsUints =
 649         // The last element of each decomposed vec3 doesn't matter.
 650         "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
 651         "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
 652         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 653         "%st_fn_${var}_entry = OpLabel\n"
 654         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 655         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 656         "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
 657         "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
 658         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 659         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 660         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 661         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 662         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 663         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 664         "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
 665         "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
 666         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 667         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 668         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 669         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 670         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 671         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 672         "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
 673         "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
 674         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 675         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 676         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 677         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 678         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 679         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 680         "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
 681         "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
 682         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 683         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 684         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 685         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 686         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 687         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 688         "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
 689         "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
 690         "OpReturn\n"
 691         "OpFunctionEnd\n";
 692
 693 const string storeM4x4F16AsUints =
 694         "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
 695         "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
 696         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 697         "%st_fn_${var}_entry = OpLabel\n"
 698         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 699         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 700         "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
 701         "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
 702         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 703         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 704         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 705         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 706         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 707         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 708         "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
 709         "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
 710         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 711         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 712         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 713         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 714         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 715         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 716         "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
 717         "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
 718         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 719         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 720         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 721         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 722         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 723         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 724         "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
 725         "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
 726         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 727         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 728         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 729         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 730         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 731         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 732         "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
 733         "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
 734         "OpReturn\n"
 735         "OpFunctionEnd\n";
 736
 737 template<typename T>
 738 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
 739 {
 740         T* const typedPtr = (T*)dst;
 741         for (int ndx = 0; ndx < numValues; ndx++)
 742                 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
 743 }
 744
 745 // Filter is a function that returns true if a value should pass, false otherwise.
 746 template<typename T, typename FilterT>
 747 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
 748 {
 749         T* const typedPtr = (T*)dst;
 750         T value;
 751         for (int ndx = 0; ndx < numValues; ndx++)
 752         {
 753                 do
 754                         value = de::randomScalar<T>(rnd, minValue, maxValue);
 755                 while (!filter(value));
 756
 757                 typedPtr[offset + ndx] = value;
 758         }
 759 }
 760
 761 // Gets a 64-bit integer with a more logarithmic distribution
 762 deInt64 randomInt64LogDistributed (de::Random& rnd)
 763 {
 764         deInt64 val = rnd.getUint64();
 765         val &= (1ull << rnd.getInt(1, 63)) - 1;
 766         if (rnd.getBool())
 767                 val = -val;
 768         return val;
 769 }
 770
 771 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
 772 {
 773         for (int ndx = 0; ndx < numValues; ndx++)
 774                 dst[ndx] = randomInt64LogDistributed(rnd);
 775 }
 776
 777 template<typename FilterT>
 778 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
 779 {
 780         for (int ndx = 0; ndx < numValues; ndx++)
 781         {
 782                 deInt64 value;
 783                 do {
 784                         value = randomInt64LogDistributed(rnd);
 785                 } while (!filter(value));
 786                 dst[ndx] = value;
 787         }
 788 }
 789
 790 inline bool filterNonNegative (const deInt64 value)
 791 {
 792         return value >= 0;
 793 }
 794
 795 inline bool filterPositive (const deInt64 value)
 796 {
 797         return value > 0;
 798 }
 799
 800 inline bool filterNotZero (const deInt64 value)
 801 {
 802         return value != 0;
 803 }
 804
 805 static void floorAll (vector<float>& values)
 806 {
 807         for (size_t i = 0; i < values.size(); i++)
 808                 values[i] = deFloatFloor(values[i]);
 809 }
 810
 811 static void floorAll (vector<Vec4>& values)
 812 {
 813         for (size_t i = 0; i < values.size(); i++)
 814                 values[i] = floor(values[i]);
 815 }
 816
 817 struct CaseParameter
 818 {
 819         const char*             name;
 820         string                  param;
 821
 822         CaseParameter   (const char* case_, const string& param_) : name(case_), param(param_) {}
 823 };
 824
 825 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
 826 //
 827 // #version 430
 828 //
 829 // layout(std140, set = 0, binding = 0) readonly buffer Input {
 830 //   float elements[];
 831 // } input_data;
 832 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
 833 //   float elements[];
 834 // } output_data;
 835 //
 836 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 837 //
 838 // void main() {
 839 //   uint x = gl_GlobalInvocationID.x;
 840 //   output_data.elements[x] = -input_data.elements[x];
 841 // }
 842
 843 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
 844 {
 845         std::ostringstream out;
 846         out << getComputeAsmShaderPreambleWithoutLocalSize();
 847
 848         if (useLiteralLocalSize)
 849         {
 850                 out << "OpExecutionMode %main LocalSize "
 851                         << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
 852         }
 853
 854         out << "OpSource GLSL 430\n"
 855                 "OpName %main           \"main\"\n"
 856                 "OpName %id             \"gl_GlobalInvocationID\"\n"
 857                 "OpDecorate %id BuiltIn GlobalInvocationId\n";
 858
 859         if (useSpecConstantWorkgroupSize)
 860         {
 861                 out << "OpDecorate %spec_0 SpecId 100\n"
 862                         << "OpDecorate %spec_1 SpecId 101\n"
 863                         << "OpDecorate %spec_2 SpecId 102\n"
 864                         << "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
 865         }
 866
 867         out << getComputeAsmInputOutputBufferTraits()
 868                 << getComputeAsmCommonTypes()
 869                 << getComputeAsmInputOutputBuffer()
 870                 << "%id        = OpVariable %uvec3ptr Input\n"
 871                 << "%zero      = OpConstant %i32 0 \n";
 872
 873         if (useSpecConstantWorkgroupSize)
 874         {
 875                 out     << "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
 876                         << "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
 877                         << "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
 878                         << "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
 879         }
 880
 881         out << "%main      = OpFunction %void None %voidf\n"
 882                 << "%label     = OpLabel\n"
 883                 << "%idval     = OpLoad %uvec3 %id\n"
 884                 << "%ndx         = OpCompositeExtract %u32 %idval " << ndx << "\n"
 885
 886                         "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
 887                         "%inval     = OpLoad %f32 %inloc\n"
 888                         "%neg       = OpFNegate %f32 %inval\n"
 889                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
 890                         "             OpStore %outloc %neg\n"
 891                         "             OpReturn\n"
 892                         "             OpFunctionEnd\n";
 893         return out.str();
 894 }
 895
 896 tcu::TestCaseGroup* createLocalSizeGroup (tcu::TestContext& testCtx)
 897 {
 898         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "localsize", ""));
 899         ComputeShaderSpec                               spec;
 900         de::Random                                              rnd                             (deStringHash(group->getName()));
 901         const deUint32                                  numElements             = 64u;
 902         vector<float>                                   positiveFloats  (numElements, 0);
 903         vector<float>                                   negativeFloats  (numElements, 0);
 904
 905         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
 906
 907         for (size_t ndx = 0; ndx < numElements; ++ndx)
 908                 negativeFloats[ndx] = -positiveFloats[ndx];
 909
 910         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
 911         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
 912
 913         spec.numWorkGroups = IVec3(numElements, 1, 1);
 914
 915         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, 1), 0u);
 916         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
 917
 918         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, 1), 0u);
 919         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
 920
 921         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, 1), 0u);
 922         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
 923
 924         spec.numWorkGroups = IVec3(1, 1, 1);
 925
 926         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(numElements, 1, 1), 0u);
 927         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
 928
 929         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(numElements, 1, 1), 0u);
 930         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
 931
 932         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(numElements, 1, 1), 0u);
 933         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
 934
 935         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, numElements, 1), 1u);
 936         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
 937
 938         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, numElements, 1), 1u);
 939         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
 940
 941         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, numElements, 1), 1u);
 942         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
 943
 944         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, numElements), 2u);
 945         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
 946
 947         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, numElements), 2u);
 948         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
 949
 950         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, numElements), 2u);
 951         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
 952
 953         return group.release();
 954 }
 955
 956 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
 957 {
 958         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
 959         ComputeShaderSpec                               spec;
 960         de::Random                                              rnd                             (deStringHash(group->getName()));
 961         const int                                               numElements             = 100;
 962         vector<float>                                   positiveFloats  (numElements, 0);
 963         vector<float>                                   negativeFloats  (numElements, 0);
 964
 965         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
 966
 967         for (size_t ndx = 0; ndx < numElements; ++ndx)
 968                 negativeFloats[ndx] = -positiveFloats[ndx];
 969
 970         spec.assembly =
 971                 string(getComputeAsmShaderPreamble()) +
 972
 973                 "OpSource GLSL 430\n"
 974                 "OpName %main           \"main\"\n"
 975                 "OpName %id             \"gl_GlobalInvocationID\"\n"
 976
 977                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
 978
 979                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
 980
 981                 + string(getComputeAsmInputOutputBuffer()) +
 982
 983                 "%id        = OpVariable %uvec3ptr Input\n"
 984                 "%zero      = OpConstant %i32 0\n"
 985
 986                 "%main      = OpFunction %void None %voidf\n"
 987                 "%label     = OpLabel\n"
 988                 "%idval     = OpLoad %uvec3 %id\n"
 989                 "%x         = OpCompositeExtract %u32 %idval 0\n"
 990
 991                 "             OpNop\n" // Inside a function body
 992
 993                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
 994                 "%inval     = OpLoad %f32 %inloc\n"
 995                 "%neg       = OpFNegate %f32 %inval\n"
 996                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
 997                 "             OpStore %outloc %neg\n"
 998                 "             OpReturn\n"
 999                 "             OpFunctionEnd\n";
1000         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1001         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1002         spec.numWorkGroups = IVec3(numElements, 1, 1);
1003
1004         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1005
1006         return group.release();
1007 }
1008
1009 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1010 {
1011         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1012         de::Random                                              rnd                             (deStringHash(group->getName()));
1013         const int                                               numElements             = 100;
1014         vector<float>                                   positiveFloats  (numElements, 0);
1015         vector<float>                                   negativeFloats  (numElements, 0);
1016
1017         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1018
1019         for (size_t ndx = 0; ndx < numElements; ++ndx)
1020                 negativeFloats[ndx] = -positiveFloats[ndx];
1021
1022         const VariableLocation                  testLocations[] =
1023         {
1024                 // Set          Binding
1025                 { 0,            5                       },
1026                 { 5,            5                       },
1027         };
1028
1029         for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1030         {
1031                 const VariableLocation& location = testLocations[locationNdx];
1032
1033                 // Unused variable.
1034                 {
1035                         ComputeShaderSpec                               spec;
1036
1037                         spec.assembly =
1038                                 string(getComputeAsmShaderPreamble()) +
1039
1040                                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1041
1042                                 + getUnusedDecorations(location)
1043
1044                                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1045
1046                                 + getUnusedTypesAndConstants()
1047
1048                                 + string(getComputeAsmInputOutputBuffer())
1049
1050                                 + getUnusedBuffer() +
1051
1052                                 "%id        = OpVariable %uvec3ptr Input\n"
1053                                 "%zero      = OpConstant %i32 0\n"
1054
1055                                 "%main      = OpFunction %void None %voidf\n"
1056                                 "%label     = OpLabel\n"
1057                                 "%idval     = OpLoad %uvec3 %id\n"
1058                                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1059
1060                                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1061                                 "%inval     = OpLoad %f32 %inloc\n"
1062                                 "%neg       = OpFNegate %f32 %inval\n"
1063                                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1064                                 "             OpStore %outloc %neg\n"
1065                                 "             OpReturn\n"
1066                                 "             OpFunctionEnd\n";
1067                         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1068                         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1069                         spec.numWorkGroups = IVec3(numElements, 1, 1);
1070
1071                         std::string testName            = "variable_" + location.toString();
1072                         std::string testDescription     = "Unused variable test with " + location.toDescription();
1073
1074                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1075                 }
1076
1077                 // Unused function.
1078                 {
1079                         ComputeShaderSpec                               spec;
1080
1081                         spec.assembly =
1082                                 string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1083
1084                                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1085
1086                                 + getUnusedDecorations(location)
1087
1088                                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1089
1090                                 + getUnusedTypesAndConstants() +
1091
1092                                 "%c_i32_0 = OpConstant %i32 0\n"
1093                                 "%c_i32_1 = OpConstant %i32 1\n"
1094
1095                                 + string(getComputeAsmInputOutputBuffer())
1096
1097                                 + getUnusedBuffer() +
1098
1099                                 "%id        = OpVariable %uvec3ptr Input\n"
1100                                 "%zero      = OpConstant %i32 0\n"
1101
1102                                 "%main      = OpFunction %void None %voidf\n"
1103                                 "%label     = OpLabel\n"
1104                                 "%idval     = OpLoad %uvec3 %id\n"
1105                                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1106
1107                                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1108                                 "%inval     = OpLoad %f32 %inloc\n"
1109                                 "%neg       = OpFNegate %f32 %inval\n"
1110                                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1111                                 "             OpStore %outloc %neg\n"
1112                                 "             OpReturn\n"
1113                                 "             OpFunctionEnd\n"
1114
1115                                 + getUnusedFunctionBody();
1116
1117                         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1118                         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1119                         spec.numWorkGroups = IVec3(numElements, 1, 1);
1120
1121                         std::string testName            = "function_" + location.toString();
1122                         std::string testDescription     = "Unused function test with " + location.toDescription();
1123
1124                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1125                 }
1126         }
1127
1128         return group.release();
1129 }
1130
1131 template<bool nanSupported>
1132 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1133 {
1134         if (outputAllocs.size() != 1)
1135                 return false;
1136
1137         vector<deUint8> input1Bytes;
1138         vector<deUint8> input2Bytes;
1139         vector<deUint8> expectedBytes;
1140
1141         inputs[0].getBytes(input1Bytes);
1142         inputs[1].getBytes(input2Bytes);
1143         expectedOutputs[0].getBytes(expectedBytes);
1144
1145         const deInt32* const    expectedOutputAsInt             = reinterpret_cast<const deInt32*>(&expectedBytes.front());
1146         const deInt32* const    outputAsInt                             = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1147         const float* const              input1AsFloat                   = reinterpret_cast<const float*>(&input1Bytes.front());
1148         const float* const              input2AsFloat                   = reinterpret_cast<const float*>(&input2Bytes.front());
1149         bool returnValue                                                                = true;
1150
1151         for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1152         {
1153                 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1154                         continue;
1155
1156                 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1157                 {
1158                         log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1159                         returnValue = false;
1160                 }
1161         }
1162         return returnValue;
1163 }
1164
1165 typedef VkBool32 (*compareFuncType) (float, float);
1166
1167 struct OpFUnordCase
1168 {
1169         const char*             name;
1170         const char*             opCode;
1171         compareFuncType compareFunc;
1172
1173                                         OpFUnordCase                    (const char* _name, const char* _opCode, compareFuncType _compareFunc)
1174                                                 : name                          (_name)
1175                                                 , opCode                        (_opCode)
1176                                                 , compareFunc           (_compareFunc) {}
1177 };
1178
1179 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1180 do { \
1181         struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1182         cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1183 } while (deGetFalse())
1184
1185 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1186 {
1187         const string                                    nan                             = testWithNan ? "_nan" : "";
1188         const string                                    groupName               = "opfunord" + nan;
1189         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1190         de::Random                                              rnd                             (deStringHash(group->getName()));
1191         const int                                               numElements             = 100;
1192         vector<OpFUnordCase>                    cases;
1193         string                                                  extensions              = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1194         string                                                  capabilities    = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1195         string                                                  exeModes                = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1196         const StringTemplate                    shaderTemplate  (
1197                 string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1198                 "OpSource GLSL 430\n"
1199                 "OpName %main           \"main\"\n"
1200                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1201
1202                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1203
1204                 "OpDecorate %buf BufferBlock\n"
1205                 "OpDecorate %buf2 BufferBlock\n"
1206                 "OpDecorate %indata1 DescriptorSet 0\n"
1207                 "OpDecorate %indata1 Binding 0\n"
1208                 "OpDecorate %indata2 DescriptorSet 0\n"
1209                 "OpDecorate %indata2 Binding 1\n"
1210                 "OpDecorate %outdata DescriptorSet 0\n"
1211                 "OpDecorate %outdata Binding 2\n"
1212                 "OpDecorate %f32arr ArrayStride 4\n"
1213                 "OpDecorate %i32arr ArrayStride 4\n"
1214                 "OpMemberDecorate %buf 0 Offset 0\n"
1215                 "OpMemberDecorate %buf2 0 Offset 0\n"
1216
1217                 + string(getComputeAsmCommonTypes()) +
1218
1219                 "%buf        = OpTypeStruct %f32arr\n"
1220                 "%bufptr     = OpTypePointer Uniform %buf\n"
1221                 "%indata1    = OpVariable %bufptr Uniform\n"
1222                 "%indata2    = OpVariable %bufptr Uniform\n"
1223
1224                 "%buf2       = OpTypeStruct %i32arr\n"
1225                 "%buf2ptr    = OpTypePointer Uniform %buf2\n"
1226                 "%outdata    = OpVariable %buf2ptr Uniform\n"
1227
1228                 "%id        = OpVariable %uvec3ptr Input\n"
1229                 "%zero      = OpConstant %i32 0\n"
1230                 "%consti1   = OpConstant %i32 1\n"
1231                 "%constf1   = OpConstant %f32 1.0\n"
1232
1233                 "%main      = OpFunction %void None %voidf\n"
1234                 "%label     = OpLabel\n"
1235                 "%idval     = OpLoad %uvec3 %id\n"
1236                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1237
1238                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1239                 "%inval1    = OpLoad %f32 %inloc1\n"
1240                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1241                 "%inval2    = OpLoad %f32 %inloc2\n"
1242                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1243
1244                 "%result    = ${OPCODE} %bool %inval1 %inval2\n"
1245                 "%int_res   = OpSelect %i32 %result %consti1 %zero\n"
1246                 "             OpStore %outloc %int_res\n"
1247
1248                 "             OpReturn\n"
1249                 "             OpFunctionEnd\n");
1250
1251         ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1252         ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1253         ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1254         ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1255         ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1256         ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1257
1258         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1259         {
1260                 map<string, string>                     specializations;
1261                 ComputeShaderSpec                       spec;
1262                 const float                                     NaN                             = std::numeric_limits<float>::quiet_NaN();
1263                 vector<float>                           inputFloats1    (numElements, 0);
1264                 vector<float>                           inputFloats2    (numElements, 0);
1265                 vector<deInt32>                         expectedInts    (numElements, 0);
1266
1267                 specializations["OPCODE"]       = cases[caseNdx].opCode;
1268                 spec.assembly                           = shaderTemplate.specialize(specializations);
1269
1270                 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1271                 for (size_t ndx = 0; ndx < numElements; ++ndx)
1272                 {
1273                         switch (ndx % 6)
1274                         {
1275                                 case 0:         inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1276                                 case 1:         inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1277                                 case 2:         inputFloats2[ndx] = inputFloats1[ndx]; break;
1278                                 case 3:         inputFloats2[ndx] = NaN; break;
1279                                 case 4:         inputFloats2[ndx] = inputFloats1[ndx];  inputFloats1[ndx] = NaN; break;
1280                                 case 5:         inputFloats2[ndx] = NaN;                                inputFloats1[ndx] = NaN; break;
1281                         }
1282                         expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1283                 }
1284
1285                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1286                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1287                 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1288                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
1289                 spec.verifyIO           = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1290
1291                 if (testWithNan)
1292                 {
1293                         spec.extensions.push_back("VK_KHR_shader_float_controls");
1294                         spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1295                 }
1296
1297                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1298         }
1299
1300         return group.release();
1301 }
1302
1303 struct OpAtomicCase
1304 {
1305         const char*             name;
1306         const char*             assembly;
1307         const char*             retValAssembly;
1308         OpAtomicType    opAtomic;
1309         deInt32                 numOutputElements;
1310
1311                                         OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1312                                                 : name                          (_name)
1313                                                 , assembly                      (_assembly)
1314                                                 , retValAssembly        (_retValAssembly)
1315                                                 , opAtomic                      (_opAtomic)
1316                                                 , numOutputElements     (_numOutputElements) {}
1317 };
1318
1319 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1320 {
1321         std::string                                             groupName                       ("opatomic");
1322         if (useStorageBuffer)
1323                 groupName += "_storage_buffer";
1324         if (verifyReturnValues)
1325                 groupName += "_return_values";
1326         if (volatileAtomic)
1327                 groupName += "_volatile";
1328         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1329         vector<OpAtomicCase>                    cases;
1330
1331         const StringTemplate                    shaderTemplate  (
1332
1333                 string("OpCapability Shader\n") +
1334                 (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1335                 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1336                 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1337                 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1338                 "OpEntryPoint GLCompute %main \"main\" %id\n"
1339                 "OpExecutionMode %main LocalSize 1 1 1\n" +
1340
1341                 "OpSource GLSL 430\n"
1342                 "OpName %main           \"main\"\n"
1343                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1344
1345                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1346
1347                 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1348                 "OpDecorate %indata DescriptorSet 0\n"
1349                 "OpDecorate %indata Binding 0\n"
1350                 "OpDecorate %i32arr ArrayStride 4\n"
1351                 "OpMemberDecorate %buf 0 Offset 0\n"
1352
1353                 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1354                 "OpDecorate %sum DescriptorSet 0\n"
1355                 "OpDecorate %sum Binding 1\n"
1356                 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1357
1358                 "${RETVAL_BUF_DECORATE}"
1359
1360                 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1361
1362                 "%buf       = OpTypeStruct %i32arr\n"
1363                 "%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1364                 "%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1365
1366                 "%sumbuf    = OpTypeStruct %i32arr\n"
1367                 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1368                 "%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1369
1370                 "${RETVAL_BUF_DECL}"
1371
1372                 "%id        = OpVariable %uvec3ptr Input\n"
1373                 "%minusone  = OpConstant %i32 -1\n"
1374                 "%zero      = OpConstant %i32 0\n"
1375                 "%one       = OpConstant %u32 1\n"
1376                 "%two       = OpConstant %i32 2\n"
1377                 "%five      = OpConstant %i32 5\n"
1378                 "%volbit    = OpConstant %i32 32768\n"
1379
1380                 "%main      = OpFunction %void None %voidf\n"
1381                 "%label     = OpLabel\n"
1382                 "%idval     = OpLoad %uvec3 %id\n"
1383                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1384
1385                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1386                 "%inval     = OpLoad %i32 %inloc\n"
1387
1388                 "%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1389                 "${INSTRUCTION}"
1390                 "${RETVAL_ASSEMBLY}"
1391
1392                 "             OpReturn\n"
1393                 "             OpFunctionEnd\n");
1394
1395         #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1396         do { \
1397                 DE_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
1398                 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1399         } while (deGetFalse())
1400         #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1401         #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1402
1403         ADD_OPATOMIC_CASE_1(iadd,       "%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1404                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IADD );
1405         ADD_OPATOMIC_CASE_1(isub,       "%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1406                                                                 "             OpStore %retloc %retv\n", OPATOMIC_ISUB );
1407         ADD_OPATOMIC_CASE_1(iinc,       "%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1408                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IINC );
1409         ADD_OPATOMIC_CASE_1(idec,       "%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1410                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IDEC );
1411         if (!verifyReturnValues)
1412         {
1413                 ADD_OPATOMIC_CASE_N(load,       "%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1414                                                                         "             OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1415                 ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1416         }
1417
1418         ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
1419                                                                 "             OpStore %outloc %even\n"
1420                                                                 "%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1421                                                                 "                         OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1422
1423
1424         #undef ADD_OPATOMIC_CASE
1425         #undef ADD_OPATOMIC_CASE_1
1426         #undef ADD_OPATOMIC_CASE_N
1427
1428         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1429         {
1430                 map<string, string>                     specializations;
1431                 ComputeShaderSpec                       spec;
1432                 vector<deInt32>                         inputInts               (numElements, 0);
1433                 vector<deInt32>                         expected                (cases[caseNdx].numOutputElements, -1);
1434
1435                 if (volatileAtomic)
1436                 {
1437                         spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1438                         // volatile, queuefamily scope
1439                         specializations["SEMANTICS"] = "%volbit";
1440                         specializations["SCOPE"] = "%five";
1441                 }
1442                 else
1443                 {
1444                         // non-volatile, device scope
1445                         specializations["SEMANTICS"] = "%zero";
1446                         specializations["SCOPE"] = "%one";
1447                 }
1448                 specializations["INDEX"]                                = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1449                 specializations["INSTRUCTION"]                  = cases[caseNdx].assembly;
1450                 specializations["BLOCK_DECORATION"]             = useStorageBuffer ? "Block" : "BufferBlock";
1451                 specializations["BLOCK_POINTER_TYPE"]   = useStorageBuffer ? "StorageBuffer" : "Uniform";
1452
1453                 if (verifyReturnValues)
1454                 {
1455                         const StringTemplate blockDecoration    (
1456                                 "\n"
1457                                 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1458                                 "OpDecorate %ret DescriptorSet 0\n"
1459                                 "OpDecorate %ret Binding 2\n"
1460                                 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1461
1462                         const StringTemplate blockDeclaration   (
1463                                 "\n"
1464                                 "%retbuf    = OpTypeStruct %i32arr\n"
1465                                 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1466                                 "%ret       = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1467
1468                         specializations["RETVAL_ASSEMBLY"] =
1469                                 "%retloc    = OpAccessChain %i32ptr %ret %zero %x\n"
1470                                 + std::string(cases[caseNdx].retValAssembly);
1471
1472                         specializations["RETVAL_BUF_DECORATE"]  = blockDecoration.specialize(specializations);
1473                         specializations["RETVAL_BUF_DECL"]              = blockDeclaration.specialize(specializations);
1474                 }
1475                 else
1476                 {
1477                         specializations["RETVAL_ASSEMBLY"]              = "";
1478                         specializations["RETVAL_BUF_DECORATE"]  = "";
1479                         specializations["RETVAL_BUF_DECL"]              = "";
1480                 }
1481
1482                 spec.assembly                                                   = shaderTemplate.specialize(specializations);
1483
1484                 // Specialize one more time, to catch things that were in a template parameter
1485                 const StringTemplate                                    assemblyTemplate(spec.assembly);
1486                 spec.assembly                                                   = assemblyTemplate.specialize(specializations);
1487
1488                 if (useStorageBuffer)
1489                         spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1490
1491                 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1492                 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1493                 if (verifyReturnValues)
1494                         spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1495                 spec.numWorkGroups = IVec3(numElements, 1, 1);
1496
1497                 if (verifyReturnValues)
1498                 {
1499                         switch (cases[caseNdx].opAtomic)
1500                         {
1501                                 case OPATOMIC_IADD:
1502                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1503                                         break;
1504                                 case OPATOMIC_ISUB:
1505                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1506                                         break;
1507                                 case OPATOMIC_IINC:
1508                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1509                                         break;
1510                                 case OPATOMIC_IDEC:
1511                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1512                                         break;
1513                                 case OPATOMIC_COMPEX:
1514                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1515                                         break;
1516                                 default:
1517                                         DE_FATAL("Unsupported OpAtomic type for return value verification");
1518                         }
1519                 }
1520                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1521         }
1522
1523         return group.release();
1524 }
1525
1526 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1527 {
1528         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1529         ComputeShaderSpec                               spec;
1530         de::Random                                              rnd                             (deStringHash(group->getName()));
1531         const int                                               numElements             = 100;
1532         vector<float>                                   positiveFloats  (numElements, 0);
1533         vector<float>                                   negativeFloats  (numElements, 0);
1534
1535         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1536
1537         for (size_t ndx = 0; ndx < numElements; ++ndx)
1538                 negativeFloats[ndx] = -positiveFloats[ndx];
1539
1540         spec.assembly =
1541                 string(getComputeAsmShaderPreamble()) +
1542
1543                 "%fname1 = OpString \"negateInputs.comp\"\n"
1544                 "%fname2 = OpString \"negateInputs\"\n"
1545
1546                 "OpSource GLSL 430\n"
1547                 "OpName %main           \"main\"\n"
1548                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1549
1550                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1551
1552                 + string(getComputeAsmInputOutputBufferTraits()) +
1553
1554                 "OpLine %fname1 0 0\n" // At the earliest possible position
1555
1556                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1557
1558                 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1559                 "OpLine %fname2 1 0\n" // Different filenames
1560                 "OpLine %fname1 1000 100000\n"
1561
1562                 "%id        = OpVariable %uvec3ptr Input\n"
1563                 "%zero      = OpConstant %i32 0\n"
1564
1565                 "OpLine %fname1 1 1\n" // Before a function
1566
1567                 "%main      = OpFunction %void None %voidf\n"
1568                 "%label     = OpLabel\n"
1569
1570                 "OpLine %fname1 1 1\n" // In a function
1571
1572                 "%idval     = OpLoad %uvec3 %id\n"
1573                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1574                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1575                 "%inval     = OpLoad %f32 %inloc\n"
1576                 "%neg       = OpFNegate %f32 %inval\n"
1577                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1578                 "             OpStore %outloc %neg\n"
1579                 "             OpReturn\n"
1580                 "             OpFunctionEnd\n";
1581         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1582         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1583         spec.numWorkGroups = IVec3(numElements, 1, 1);
1584
1585         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1586
1587         return group.release();
1588 }
1589
1590 bool veryfiBinaryShader (const ProgramBinary& binary)
1591 {
1592         const size_t    paternCount                     = 3u;
1593         bool paternsCheck[paternCount]          =
1594         {
1595                 false, false, false
1596         };
1597         const string patersns[paternCount]      =
1598         {
1599                 "VULKAN CTS",
1600                 "Negative values",
1601                 "Date: 2017/09/21"
1602         };
1603         size_t                  paternNdx               = 0u;
1604
1605         for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1606         {
1607                 if (false == paternsCheck[paternNdx] &&
1608                         patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1609                         deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1610                 {
1611                         paternsCheck[paternNdx]= true;
1612                         paternNdx++;
1613                         if (paternNdx == paternCount)
1614                                 break;
1615                 }
1616         }
1617
1618         for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1619         {
1620                 if (!paternsCheck[ndx])
1621                         return false;
1622         }
1623
1624         return true;
1625 }
1626
1627 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1628 {
1629         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1630         ComputeShaderSpec                               spec;
1631         de::Random                                              rnd                             (deStringHash(group->getName()));
1632         const int                                               numElements             = 10;
1633         vector<float>                                   positiveFloats  (numElements, 0);
1634         vector<float>                                   negativeFloats  (numElements, 0);
1635
1636         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1637
1638         for (size_t ndx = 0; ndx < numElements; ++ndx)
1639                 negativeFloats[ndx] = -positiveFloats[ndx];
1640
1641         spec.assembly =
1642                 string(getComputeAsmShaderPreamble()) +
1643                 "%fname = OpString \"negateInputs.comp\"\n"
1644
1645                 "OpSource GLSL 430\n"
1646                 "OpName %main           \"main\"\n"
1647                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1648                 "OpModuleProcessed \"VULKAN CTS\"\n"                                    //OpModuleProcessed;
1649                 "OpModuleProcessed \"Negative values\"\n"
1650                 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1651                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1652
1653                 + string(getComputeAsmInputOutputBufferTraits())
1654
1655                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1656
1657                 "OpLine %fname 0 1\n"
1658
1659                 "OpLine %fname 1000 1\n"
1660
1661                 "%id        = OpVariable %uvec3ptr Input\n"
1662                 "%zero      = OpConstant %i32 0\n"
1663                 "%main      = OpFunction %void None %voidf\n"
1664
1665                 "%label     = OpLabel\n"
1666                 "%idval     = OpLoad %uvec3 %id\n"
1667                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1668
1669                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1670                 "%inval     = OpLoad %f32 %inloc\n"
1671                 "%neg       = OpFNegate %f32 %inval\n"
1672                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1673                 "             OpStore %outloc %neg\n"
1674                 "             OpReturn\n"
1675                 "             OpFunctionEnd\n";
1676         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1677         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1678         spec.numWorkGroups = IVec3(numElements, 1, 1);
1679         spec.verifyBinary = veryfiBinaryShader;
1680         spec.spirvVersion = SPIRV_VERSION_1_3;
1681
1682         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1683
1684         return group.release();
1685 }
1686
1687 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1688 {
1689         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1690         ComputeShaderSpec                               spec;
1691         de::Random                                              rnd                             (deStringHash(group->getName()));
1692         const int                                               numElements             = 100;
1693         vector<float>                                   positiveFloats  (numElements, 0);
1694         vector<float>                                   negativeFloats  (numElements, 0);
1695
1696         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1697
1698         for (size_t ndx = 0; ndx < numElements; ++ndx)
1699                 negativeFloats[ndx] = -positiveFloats[ndx];
1700
1701         spec.assembly =
1702                 string(getComputeAsmShaderPreamble()) +
1703
1704                 "%fname = OpString \"negateInputs.comp\"\n"
1705
1706                 "OpSource GLSL 430\n"
1707                 "OpName %main           \"main\"\n"
1708                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1709
1710                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1711
1712                 + string(getComputeAsmInputOutputBufferTraits()) +
1713
1714                 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1715
1716                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1717
1718                 "OpLine %fname 0 1\n"
1719                 "OpNoLine\n" // Immediately following a preceding OpLine
1720
1721                 "OpLine %fname 1000 1\n"
1722
1723                 "%id        = OpVariable %uvec3ptr Input\n"
1724                 "%zero      = OpConstant %i32 0\n"
1725
1726                 "OpNoLine\n" // Contents after the previous OpLine
1727
1728                 "%main      = OpFunction %void None %voidf\n"
1729                 "%label     = OpLabel\n"
1730                 "%idval     = OpLoad %uvec3 %id\n"
1731                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1732
1733                 "OpNoLine\n" // Multiple OpNoLine
1734                 "OpNoLine\n"
1735                 "OpNoLine\n"
1736
1737                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1738                 "%inval     = OpLoad %f32 %inloc\n"
1739                 "%neg       = OpFNegate %f32 %inval\n"
1740                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1741                 "             OpStore %outloc %neg\n"
1742                 "             OpReturn\n"
1743                 "             OpFunctionEnd\n";
1744         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1745         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1746         spec.numWorkGroups = IVec3(numElements, 1, 1);
1747
1748         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1749
1750         return group.release();
1751 }
1752
1753 // Compare instruction for the contraction compute case.
1754 // Returns true if the output is what is expected from the test case.
1755 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1756 {
1757         if (outputAllocs.size() != 1)
1758                 return false;
1759
1760         // Only size is needed because we are not comparing the exact values.
1761         size_t byteSize = expectedOutputs[0].getByteSize();
1762
1763         const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1764
1765         for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1766                 if (outputAsFloat[i] != 0.f &&
1767                         outputAsFloat[i] != -ldexp(1, -24)) {
1768                         return false;
1769                 }
1770         }
1771
1772         return true;
1773 }
1774
1775 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1776 {
1777         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1778         vector<CaseParameter>                   cases;
1779         const int                                               numElements             = 100;
1780         vector<float>                                   inputFloats1    (numElements, 0);
1781         vector<float>                                   inputFloats2    (numElements, 0);
1782         vector<float>                                   outputFloats    (numElements, 0);
1783         const StringTemplate                    shaderTemplate  (
1784                 string(getComputeAsmShaderPreamble()) +
1785
1786                 "OpName %main           \"main\"\n"
1787                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1788
1789                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1790
1791                 "${DECORATION}\n"
1792
1793                 "OpDecorate %buf BufferBlock\n"
1794                 "OpDecorate %indata1 DescriptorSet 0\n"
1795                 "OpDecorate %indata1 Binding 0\n"
1796                 "OpDecorate %indata2 DescriptorSet 0\n"
1797                 "OpDecorate %indata2 Binding 1\n"
1798                 "OpDecorate %outdata DescriptorSet 0\n"
1799                 "OpDecorate %outdata Binding 2\n"
1800                 "OpDecorate %f32arr ArrayStride 4\n"
1801                 "OpMemberDecorate %buf 0 Offset 0\n"
1802
1803                 + string(getComputeAsmCommonTypes()) +
1804
1805                 "%buf        = OpTypeStruct %f32arr\n"
1806                 "%bufptr     = OpTypePointer Uniform %buf\n"
1807                 "%indata1    = OpVariable %bufptr Uniform\n"
1808                 "%indata2    = OpVariable %bufptr Uniform\n"
1809                 "%outdata    = OpVariable %bufptr Uniform\n"
1810
1811                 "%id         = OpVariable %uvec3ptr Input\n"
1812                 "%zero       = OpConstant %i32 0\n"
1813                 "%c_f_m1     = OpConstant %f32 -1.\n"
1814
1815                 "%main       = OpFunction %void None %voidf\n"
1816                 "%label      = OpLabel\n"
1817                 "%idval      = OpLoad %uvec3 %id\n"
1818                 "%x          = OpCompositeExtract %u32 %idval 0\n"
1819                 "%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
1820                 "%inval1     = OpLoad %f32 %inloc1\n"
1821                 "%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
1822                 "%inval2     = OpLoad %f32 %inloc2\n"
1823                 "%mul        = OpFMul %f32 %inval1 %inval2\n"
1824                 "%add        = OpFAdd %f32 %mul %c_f_m1\n"
1825                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1826                 "              OpStore %outloc %add\n"
1827                 "              OpReturn\n"
1828                 "              OpFunctionEnd\n");
1829
1830         cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1831         cases.push_back(CaseParameter("addition",               "OpDecorate %add NoContraction"));
1832         cases.push_back(CaseParameter("both",                   "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1833
1834         for (size_t ndx = 0; ndx < numElements; ++ndx)
1835         {
1836                 inputFloats1[ndx]       = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1837                 inputFloats2[ndx]       = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1838                 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1839                 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1840                 // So the final result will be 0.f or 0x1p-24.
1841                 // If the operation is combined into a precise fused multiply-add, then the result would be
1842                 // 2^-46 (0xa8800000).
1843                 outputFloats[ndx]       = 0.f;
1844         }
1845
1846         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1847         {
1848                 map<string, string>             specializations;
1849                 ComputeShaderSpec               spec;
1850
1851                 specializations["DECORATION"] = cases[caseNdx].param;
1852                 spec.assembly = shaderTemplate.specialize(specializations);
1853                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1854                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1855                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1856                 spec.numWorkGroups = IVec3(numElements, 1, 1);
1857                 // Check against the two possible answers based on rounding mode.
1858                 spec.verifyIO = &compareNoContractCase;
1859
1860                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1861         }
1862         return group.release();
1863 }
1864
1865 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1866 {
1867         if (outputAllocs.size() != 1)
1868                 return false;
1869
1870         vector<deUint8> expectedBytes;
1871         expectedOutputs[0].getBytes(expectedBytes);
1872
1873         const float*    expectedOutputAsFloat   = reinterpret_cast<const float*>(&expectedBytes.front());
1874         const float*    outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1875
1876         for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1877         {
1878                 const float f0 = expectedOutputAsFloat[idx];
1879                 const float f1 = outputAsFloat[idx];
1880                 // \todo relative error needs to be fairly high because FRem may be implemented as
1881                 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1882                 if (deFloatAbs((f1 - f0) / f0) > 0.02)
1883                         return false;
1884         }
1885
1886         return true;
1887 }
1888
1889 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1890 {
1891         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1892         ComputeShaderSpec                               spec;
1893         de::Random                                              rnd                             (deStringHash(group->getName()));
1894         const int                                               numElements             = 200;
1895         vector<float>                                   inputFloats1    (numElements, 0);
1896         vector<float>                                   inputFloats2    (numElements, 0);
1897         vector<float>                                   outputFloats    (numElements, 0);
1898
1899         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1900         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1901
1902         for (size_t ndx = 0; ndx < numElements; ++ndx)
1903         {
1904                 // Guard against divisors near zero.
1905                 if (std::fabs(inputFloats2[ndx]) < 1e-3)
1906                         inputFloats2[ndx] = 8.f;
1907
1908                 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1909                 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1910         }
1911
1912         spec.assembly =
1913                 string(getComputeAsmShaderPreamble()) +
1914
1915                 "OpName %main           \"main\"\n"
1916                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1917
1918                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1919
1920                 "OpDecorate %buf BufferBlock\n"
1921                 "OpDecorate %indata1 DescriptorSet 0\n"
1922                 "OpDecorate %indata1 Binding 0\n"
1923                 "OpDecorate %indata2 DescriptorSet 0\n"
1924                 "OpDecorate %indata2 Binding 1\n"
1925                 "OpDecorate %outdata DescriptorSet 0\n"
1926                 "OpDecorate %outdata Binding 2\n"
1927                 "OpDecorate %f32arr ArrayStride 4\n"
1928                 "OpMemberDecorate %buf 0 Offset 0\n"
1929
1930                 + string(getComputeAsmCommonTypes()) +
1931
1932                 "%buf        = OpTypeStruct %f32arr\n"
1933                 "%bufptr     = OpTypePointer Uniform %buf\n"
1934                 "%indata1    = OpVariable %bufptr Uniform\n"
1935                 "%indata2    = OpVariable %bufptr Uniform\n"
1936                 "%outdata    = OpVariable %bufptr Uniform\n"
1937
1938                 "%id        = OpVariable %uvec3ptr Input\n"
1939                 "%zero      = OpConstant %i32 0\n"
1940
1941                 "%main      = OpFunction %void None %voidf\n"
1942                 "%label     = OpLabel\n"
1943                 "%idval     = OpLoad %uvec3 %id\n"
1944                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1945                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1946                 "%inval1    = OpLoad %f32 %inloc1\n"
1947                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1948                 "%inval2    = OpLoad %f32 %inloc2\n"
1949                 "%rem       = OpFRem %f32 %inval1 %inval2\n"
1950                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1951                 "             OpStore %outloc %rem\n"
1952                 "             OpReturn\n"
1953                 "             OpFunctionEnd\n";
1954
1955         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1956         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1957         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1958         spec.numWorkGroups = IVec3(numElements, 1, 1);
1959         spec.verifyIO = &compareFRem;
1960
1961         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
1962
1963         return group.release();
1964 }
1965
1966 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1967 {
1968         if (outputAllocs.size() != 1)
1969                 return false;
1970
1971         const BufferSp&                 expectedOutput                  (expectedOutputs[0].getBuffer());
1972         std::vector<deUint8>    data;
1973         expectedOutput->getBytes(data);
1974
1975         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
1976         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1977
1978         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
1979         {
1980                 const float f0 = expectedOutputAsFloat[idx];
1981                 const float f1 = outputAsFloat[idx];
1982
1983                 // For NMin, we accept NaN as output if both inputs were NaN.
1984                 // Otherwise the NaN is the wrong choise, as on architectures that
1985                 // do not handle NaN, those are huge values.
1986                 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
1987                         return false;
1988         }
1989
1990         return true;
1991 }
1992
1993 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
1994 {
1995         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
1996         ComputeShaderSpec                               spec;
1997         de::Random                                              rnd                             (deStringHash(group->getName()));
1998         const int                                               numElements             = 200;
1999         vector<float>                                   inputFloats1    (numElements, 0);
2000         vector<float>                                   inputFloats2    (numElements, 0);
2001         vector<float>                                   outputFloats    (numElements, 0);
2002
2003         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2004         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2005
2006         // Make the first case a full-NAN case.
2007         inputFloats1[0] = TCU_NAN;
2008         inputFloats2[0] = TCU_NAN;
2009
2010         for (size_t ndx = 0; ndx < numElements; ++ndx)
2011         {
2012                 // By default, pick the smallest
2013                 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2014
2015                 // Make half of the cases NaN cases
2016                 if ((ndx & 1) == 0)
2017                 {
2018                         // Alternate between the NaN operand
2019                         if ((ndx & 2) == 0)
2020                         {
2021                                 outputFloats[ndx] = inputFloats2[ndx];
2022                                 inputFloats1[ndx] = TCU_NAN;
2023                         }
2024                         else
2025                         {
2026                                 outputFloats[ndx] = inputFloats1[ndx];
2027                                 inputFloats2[ndx] = TCU_NAN;
2028                         }
2029                 }
2030         }
2031
2032         spec.assembly =
2033                 "OpCapability Shader\n"
2034                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2035                 "OpMemoryModel Logical GLSL450\n"
2036                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2037                 "OpExecutionMode %main LocalSize 1 1 1\n"
2038
2039                 "OpName %main           \"main\"\n"
2040                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2041
2042                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2043
2044                 "OpDecorate %buf BufferBlock\n"
2045                 "OpDecorate %indata1 DescriptorSet 0\n"
2046                 "OpDecorate %indata1 Binding 0\n"
2047                 "OpDecorate %indata2 DescriptorSet 0\n"
2048                 "OpDecorate %indata2 Binding 1\n"
2049                 "OpDecorate %outdata DescriptorSet 0\n"
2050                 "OpDecorate %outdata Binding 2\n"
2051                 "OpDecorate %f32arr ArrayStride 4\n"
2052                 "OpMemberDecorate %buf 0 Offset 0\n"
2053
2054                 + string(getComputeAsmCommonTypes()) +
2055
2056                 "%buf        = OpTypeStruct %f32arr\n"
2057                 "%bufptr     = OpTypePointer Uniform %buf\n"
2058                 "%indata1    = OpVariable %bufptr Uniform\n"
2059                 "%indata2    = OpVariable %bufptr Uniform\n"
2060                 "%outdata    = OpVariable %bufptr Uniform\n"
2061
2062                 "%id        = OpVariable %uvec3ptr Input\n"
2063                 "%zero      = OpConstant %i32 0\n"
2064
2065                 "%main      = OpFunction %void None %voidf\n"
2066                 "%label     = OpLabel\n"
2067                 "%idval     = OpLoad %uvec3 %id\n"
2068                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2069                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2070                 "%inval1    = OpLoad %f32 %inloc1\n"
2071                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2072                 "%inval2    = OpLoad %f32 %inloc2\n"
2073                 "%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2074                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2075                 "             OpStore %outloc %rem\n"
2076                 "             OpReturn\n"
2077                 "             OpFunctionEnd\n";
2078
2079         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2080         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2081         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2082         spec.numWorkGroups = IVec3(numElements, 1, 1);
2083         spec.verifyIO = &compareNMin;
2084
2085         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2086
2087         return group.release();
2088 }
2089
2090 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2091 {
2092         if (outputAllocs.size() != 1)
2093                 return false;
2094
2095         const BufferSp&                 expectedOutput                  = expectedOutputs[0].getBuffer();
2096         std::vector<deUint8>    data;
2097         expectedOutput->getBytes(data);
2098
2099         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2100         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2101
2102         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2103         {
2104                 const float f0 = expectedOutputAsFloat[idx];
2105                 const float f1 = outputAsFloat[idx];
2106
2107                 // For NMax, NaN is considered acceptable result, since in
2108                 // architectures that do not handle NaNs, those are huge values.
2109                 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2110                         return false;
2111         }
2112
2113         return true;
2114 }
2115
2116 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2117 {
2118         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2119         ComputeShaderSpec                               spec;
2120         de::Random                                              rnd                             (deStringHash(group->getName()));
2121         const int                                               numElements             = 200;
2122         vector<float>                                   inputFloats1    (numElements, 0);
2123         vector<float>                                   inputFloats2    (numElements, 0);
2124         vector<float>                                   outputFloats    (numElements, 0);
2125
2126         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2127         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2128
2129         // Make the first case a full-NAN case.
2130         inputFloats1[0] = TCU_NAN;
2131         inputFloats2[0] = TCU_NAN;
2132
2133         for (size_t ndx = 0; ndx < numElements; ++ndx)
2134         {
2135                 // By default, pick the biggest
2136                 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2137
2138                 // Make half of the cases NaN cases
2139                 if ((ndx & 1) == 0)
2140                 {
2141                         // Alternate between the NaN operand
2142                         if ((ndx & 2) == 0)
2143                         {
2144                                 outputFloats[ndx] = inputFloats2[ndx];
2145                                 inputFloats1[ndx] = TCU_NAN;
2146                         }
2147                         else
2148                         {
2149                                 outputFloats[ndx] = inputFloats1[ndx];
2150                                 inputFloats2[ndx] = TCU_NAN;
2151                         }
2152                 }
2153         }
2154
2155         spec.assembly =
2156                 "OpCapability Shader\n"
2157                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2158                 "OpMemoryModel Logical GLSL450\n"
2159                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2160                 "OpExecutionMode %main LocalSize 1 1 1\n"
2161
2162                 "OpName %main           \"main\"\n"
2163                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2164
2165                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2166
2167                 "OpDecorate %buf BufferBlock\n"
2168                 "OpDecorate %indata1 DescriptorSet 0\n"
2169                 "OpDecorate %indata1 Binding 0\n"
2170                 "OpDecorate %indata2 DescriptorSet 0\n"
2171                 "OpDecorate %indata2 Binding 1\n"
2172                 "OpDecorate %outdata DescriptorSet 0\n"
2173                 "OpDecorate %outdata Binding 2\n"
2174                 "OpDecorate %f32arr ArrayStride 4\n"
2175                 "OpMemberDecorate %buf 0 Offset 0\n"
2176
2177                 + string(getComputeAsmCommonTypes()) +
2178
2179                 "%buf        = OpTypeStruct %f32arr\n"
2180                 "%bufptr     = OpTypePointer Uniform %buf\n"
2181                 "%indata1    = OpVariable %bufptr Uniform\n"
2182                 "%indata2    = OpVariable %bufptr Uniform\n"
2183                 "%outdata    = OpVariable %bufptr Uniform\n"
2184
2185                 "%id        = OpVariable %uvec3ptr Input\n"
2186                 "%zero      = OpConstant %i32 0\n"
2187
2188                 "%main      = OpFunction %void None %voidf\n"
2189                 "%label     = OpLabel\n"
2190                 "%idval     = OpLoad %uvec3 %id\n"
2191                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2192                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2193                 "%inval1    = OpLoad %f32 %inloc1\n"
2194                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2195                 "%inval2    = OpLoad %f32 %inloc2\n"
2196                 "%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2197                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2198                 "             OpStore %outloc %rem\n"
2199                 "             OpReturn\n"
2200                 "             OpFunctionEnd\n";
2201
2202         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2203         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2204         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2205         spec.numWorkGroups = IVec3(numElements, 1, 1);
2206         spec.verifyIO = &compareNMax;
2207
2208         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2209
2210         return group.release();
2211 }
2212
2213 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2214 {
2215         if (outputAllocs.size() != 1)
2216                 return false;
2217
2218         const BufferSp&                 expectedOutput                  = expectedOutputs[0].getBuffer();
2219         std::vector<deUint8>    data;
2220         expectedOutput->getBytes(data);
2221
2222         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2223         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2224
2225         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2226         {
2227                 const float e0 = expectedOutputAsFloat[idx * 2];
2228                 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2229                 const float res = outputAsFloat[idx];
2230
2231                 // For NClamp, we have two possible outcomes based on
2232                 // whether NaNs are handled or not.
2233                 // If either min or max value is NaN, the result is undefined,
2234                 // so this test doesn't stress those. If the clamped value is
2235                 // NaN, and NaNs are handled, the result is min; if NaNs are not
2236                 // handled, they are big values that result in max.
2237                 // If all three parameters are NaN, the result should be NaN.
2238                 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2239                          (deFloatAbs(e0 - res) < 0.00001f) ||
2240                          (deFloatAbs(e1 - res) < 0.00001f)))
2241                         return false;
2242         }
2243
2244         return true;
2245 }
2246
2247 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2248 {
2249         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2250         ComputeShaderSpec                               spec;
2251         de::Random                                              rnd                             (deStringHash(group->getName()));
2252         const int                                               numElements             = 200;
2253         vector<float>                                   inputFloats1    (numElements, 0);
2254         vector<float>                                   inputFloats2    (numElements, 0);
2255         vector<float>                                   inputFloats3    (numElements, 0);
2256         vector<float>                                   outputFloats    (numElements * 2, 0);
2257
2258         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2259         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2260         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2261
2262         for (size_t ndx = 0; ndx < numElements; ++ndx)
2263         {
2264                 // Results are only defined if max value is bigger than min value.
2265                 if (inputFloats2[ndx] > inputFloats3[ndx])
2266                 {
2267                         float t = inputFloats2[ndx];
2268                         inputFloats2[ndx] = inputFloats3[ndx];
2269                         inputFloats3[ndx] = t;
2270                 }
2271
2272                 // By default, do the clamp, setting both possible answers
2273                 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2274
2275                 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2276                 float maxResB = maxResA;
2277
2278                 // Alternate between the NaN cases
2279                 if (ndx & 1)
2280                 {
2281                         inputFloats1[ndx] = TCU_NAN;
2282                         // If NaN is handled, the result should be same as the clamp minimum.
2283                         // If NaN is not handled, the result should clamp to the clamp maximum.
2284                         maxResA = inputFloats2[ndx];
2285                         maxResB = inputFloats3[ndx];
2286                 }
2287                 else
2288                 {
2289                         // Not a NaN case - only one legal result.
2290                         maxResA = defaultRes;
2291                         maxResB = defaultRes;
2292                 }
2293
2294                 outputFloats[ndx * 2] = maxResA;
2295                 outputFloats[ndx * 2 + 1] = maxResB;
2296         }
2297
2298         // Make the first case a full-NAN case.
2299         inputFloats1[0] = TCU_NAN;
2300         inputFloats2[0] = TCU_NAN;
2301         inputFloats3[0] = TCU_NAN;
2302         outputFloats[0] = TCU_NAN;
2303         outputFloats[1] = TCU_NAN;
2304
2305         spec.assembly =
2306                 "OpCapability Shader\n"
2307                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2308                 "OpMemoryModel Logical GLSL450\n"
2309                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2310                 "OpExecutionMode %main LocalSize 1 1 1\n"
2311
2312                 "OpName %main           \"main\"\n"
2313                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2314
2315                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2316
2317                 "OpDecorate %buf BufferBlock\n"
2318                 "OpDecorate %indata1 DescriptorSet 0\n"
2319                 "OpDecorate %indata1 Binding 0\n"
2320                 "OpDecorate %indata2 DescriptorSet 0\n"
2321                 "OpDecorate %indata2 Binding 1\n"
2322                 "OpDecorate %indata3 DescriptorSet 0\n"
2323                 "OpDecorate %indata3 Binding 2\n"
2324                 "OpDecorate %outdata DescriptorSet 0\n"
2325                 "OpDecorate %outdata Binding 3\n"
2326                 "OpDecorate %f32arr ArrayStride 4\n"
2327                 "OpMemberDecorate %buf 0 Offset 0\n"
2328
2329                 + string(getComputeAsmCommonTypes()) +
2330
2331                 "%buf        = OpTypeStruct %f32arr\n"
2332                 "%bufptr     = OpTypePointer Uniform %buf\n"
2333                 "%indata1    = OpVariable %bufptr Uniform\n"
2334                 "%indata2    = OpVariable %bufptr Uniform\n"
2335                 "%indata3    = OpVariable %bufptr Uniform\n"
2336                 "%outdata    = OpVariable %bufptr Uniform\n"
2337
2338                 "%id        = OpVariable %uvec3ptr Input\n"
2339                 "%zero      = OpConstant %i32 0\n"
2340
2341                 "%main      = OpFunction %void None %voidf\n"
2342                 "%label     = OpLabel\n"
2343                 "%idval     = OpLoad %uvec3 %id\n"
2344                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2345                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2346                 "%inval1    = OpLoad %f32 %inloc1\n"
2347                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2348                 "%inval2    = OpLoad %f32 %inloc2\n"
2349                 "%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
2350                 "%inval3    = OpLoad %f32 %inloc3\n"
2351                 "%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2352                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2353                 "             OpStore %outloc %rem\n"
2354                 "             OpReturn\n"
2355                 "             OpFunctionEnd\n";
2356
2357         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2358         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2359         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2360         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2361         spec.numWorkGroups = IVec3(numElements, 1, 1);
2362         spec.verifyIO = &compareNClamp;
2363
2364         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2365
2366         return group.release();
2367 }
2368
2369 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2370 {
2371         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2372         de::Random                                              rnd                             (deStringHash(group->getName()));
2373         const int                                               numElements             = 200;
2374
2375         const struct CaseParams
2376         {
2377                 const char*             name;
2378                 const char*             failMessage;            // customized status message
2379                 qpTestResult    failResult;                     // override status on failure
2380                 int                             op1Min, op1Max;         // operand ranges
2381                 int                             op2Min, op2Max;
2382         } cases[] =
2383         {
2384                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
2385                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
2386         };
2387         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2388
2389         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2390         {
2391                 const CaseParams&       params          = cases[caseNdx];
2392                 ComputeShaderSpec       spec;
2393                 vector<deInt32>         inputInts1      (numElements, 0);
2394                 vector<deInt32>         inputInts2      (numElements, 0);
2395                 vector<deInt32>         outputInts      (numElements, 0);
2396
2397                 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2398                 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2399
2400                 for (int ndx = 0; ndx < numElements; ++ndx)
2401                 {
2402                         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2403                         outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2404                 }
2405
2406                 spec.assembly =
2407                         string(getComputeAsmShaderPreamble()) +
2408
2409                         "OpName %main           \"main\"\n"
2410                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2411
2412                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2413
2414                         "OpDecorate %buf BufferBlock\n"
2415                         "OpDecorate %indata1 DescriptorSet 0\n"
2416                         "OpDecorate %indata1 Binding 0\n"
2417                         "OpDecorate %indata2 DescriptorSet 0\n"
2418                         "OpDecorate %indata2 Binding 1\n"
2419                         "OpDecorate %outdata DescriptorSet 0\n"
2420                         "OpDecorate %outdata Binding 2\n"
2421                         "OpDecorate %i32arr ArrayStride 4\n"
2422                         "OpMemberDecorate %buf 0 Offset 0\n"
2423
2424                         + string(getComputeAsmCommonTypes()) +
2425
2426                         "%buf        = OpTypeStruct %i32arr\n"
2427                         "%bufptr     = OpTypePointer Uniform %buf\n"
2428                         "%indata1    = OpVariable %bufptr Uniform\n"
2429                         "%indata2    = OpVariable %bufptr Uniform\n"
2430                         "%outdata    = OpVariable %bufptr Uniform\n"
2431
2432                         "%id        = OpVariable %uvec3ptr Input\n"
2433                         "%zero      = OpConstant %i32 0\n"
2434
2435                         "%main      = OpFunction %void None %voidf\n"
2436                         "%label     = OpLabel\n"
2437                         "%idval     = OpLoad %uvec3 %id\n"
2438                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2439                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2440                         "%inval1    = OpLoad %i32 %inloc1\n"
2441                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2442                         "%inval2    = OpLoad %i32 %inloc2\n"
2443                         "%rem       = OpSRem %i32 %inval1 %inval2\n"
2444                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2445                         "             OpStore %outloc %rem\n"
2446                         "             OpReturn\n"
2447                         "             OpFunctionEnd\n";
2448
2449                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
2450                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
2451                 spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
2452                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2453                 spec.failResult                 = params.failResult;
2454                 spec.failMessage                = params.failMessage;
2455
2456                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2457         }
2458
2459         return group.release();
2460 }
2461
2462 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2463 {
2464         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2465         de::Random                                              rnd                             (deStringHash(group->getName()));
2466         const int                                               numElements             = 200;
2467
2468         const struct CaseParams
2469         {
2470                 const char*             name;
2471                 const char*             failMessage;            // customized status message
2472                 qpTestResult    failResult;                     // override status on failure
2473                 bool                    positive;
2474         } cases[] =
2475         {
2476                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
2477                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
2478         };
2479         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2480
2481         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2482         {
2483                 const CaseParams&       params          = cases[caseNdx];
2484                 ComputeShaderSpec       spec;
2485                 vector<deInt64>         inputInts1      (numElements, 0);
2486                 vector<deInt64>         inputInts2      (numElements, 0);
2487                 vector<deInt64>         outputInts      (numElements, 0);
2488
2489                 if (params.positive)
2490                 {
2491                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2492                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2493                 }
2494                 else
2495                 {
2496                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2497                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2498                 }
2499
2500                 for (int ndx = 0; ndx < numElements; ++ndx)
2501                 {
2502                         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2503                         outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2504                 }
2505
2506                 spec.assembly =
2507                         "OpCapability Int64\n"
2508
2509                         + string(getComputeAsmShaderPreamble()) +
2510
2511                         "OpName %main           \"main\"\n"
2512                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2513
2514                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2515
2516                         "OpDecorate %buf BufferBlock\n"
2517                         "OpDecorate %indata1 DescriptorSet 0\n"
2518                         "OpDecorate %indata1 Binding 0\n"
2519                         "OpDecorate %indata2 DescriptorSet 0\n"
2520                         "OpDecorate %indata2 Binding 1\n"
2521                         "OpDecorate %outdata DescriptorSet 0\n"
2522                         "OpDecorate %outdata Binding 2\n"
2523                         "OpDecorate %i64arr ArrayStride 8\n"
2524                         "OpMemberDecorate %buf 0 Offset 0\n"
2525
2526                         + string(getComputeAsmCommonTypes())
2527                         + string(getComputeAsmCommonInt64Types()) +
2528
2529                         "%buf        = OpTypeStruct %i64arr\n"
2530                         "%bufptr     = OpTypePointer Uniform %buf\n"
2531                         "%indata1    = OpVariable %bufptr Uniform\n"
2532                         "%indata2    = OpVariable %bufptr Uniform\n"
2533                         "%outdata    = OpVariable %bufptr Uniform\n"
2534
2535                         "%id        = OpVariable %uvec3ptr Input\n"
2536                         "%zero      = OpConstant %i64 0\n"
2537
2538                         "%main      = OpFunction %void None %voidf\n"
2539                         "%label     = OpLabel\n"
2540                         "%idval     = OpLoad %uvec3 %id\n"
2541                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2542                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2543                         "%inval1    = OpLoad %i64 %inloc1\n"
2544                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2545                         "%inval2    = OpLoad %i64 %inloc2\n"
2546                         "%rem       = OpSRem %i64 %inval1 %inval2\n"
2547                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2548                         "             OpStore %outloc %rem\n"
2549                         "             OpReturn\n"
2550                         "             OpFunctionEnd\n";
2551
2552                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
2553                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
2554                 spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
2555                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2556                 spec.failResult                 = params.failResult;
2557                 spec.failMessage                = params.failMessage;
2558
2559                 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2560
2561                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2562         }
2563
2564         return group.release();
2565 }
2566
2567 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2568 {
2569         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2570         de::Random                                              rnd                             (deStringHash(group->getName()));
2571         const int                                               numElements             = 200;
2572
2573         const struct CaseParams
2574         {
2575                 const char*             name;
2576                 const char*             failMessage;            // customized status message
2577                 qpTestResult    failResult;                     // override status on failure
2578                 int                             op1Min, op1Max;         // operand ranges
2579                 int                             op2Min, op2Max;
2580         } cases[] =
2581         {
2582                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
2583                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
2584         };
2585         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2586
2587         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2588         {
2589                 const CaseParams&       params          = cases[caseNdx];
2590
2591                 ComputeShaderSpec       spec;
2592                 vector<deInt32>         inputInts1      (numElements, 0);
2593                 vector<deInt32>         inputInts2      (numElements, 0);
2594                 vector<deInt32>         outputInts      (numElements, 0);
2595
2596                 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2597                 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2598
2599                 for (int ndx = 0; ndx < numElements; ++ndx)
2600                 {
2601                         deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2602                         if (rem == 0)
2603                         {
2604                                 outputInts[ndx] = 0;
2605                         }
2606                         else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2607                         {
2608                                 // They have the same sign
2609                                 outputInts[ndx] = rem;
2610                         }
2611                         else
2612                         {
2613                                 // They have opposite sign.  The remainder operation takes the
2614                                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2615                                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2616                                 // the result has the correct sign and that it is still
2617                                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2618                                 //
2619                                 // See also http://mathforum.org/library/drmath/view/52343.html
2620                                 outputInts[ndx] = rem + inputInts2[ndx];
2621                         }
2622                 }
2623
2624                 spec.assembly =
2625                         string(getComputeAsmShaderPreamble()) +
2626
2627                         "OpName %main           \"main\"\n"
2628                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2629
2630                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2631
2632                         "OpDecorate %buf BufferBlock\n"
2633                         "OpDecorate %indata1 DescriptorSet 0\n"
2634                         "OpDecorate %indata1 Binding 0\n"
2635                         "OpDecorate %indata2 DescriptorSet 0\n"
2636                         "OpDecorate %indata2 Binding 1\n"
2637                         "OpDecorate %outdata DescriptorSet 0\n"
2638                         "OpDecorate %outdata Binding 2\n"
2639                         "OpDecorate %i32arr ArrayStride 4\n"
2640                         "OpMemberDecorate %buf 0 Offset 0\n"
2641
2642                         + string(getComputeAsmCommonTypes()) +
2643
2644                         "%buf        = OpTypeStruct %i32arr\n"
2645                         "%bufptr     = OpTypePointer Uniform %buf\n"
2646                         "%indata1    = OpVariable %bufptr Uniform\n"
2647                         "%indata2    = OpVariable %bufptr Uniform\n"
2648                         "%outdata    = OpVariable %bufptr Uniform\n"
2649
2650                         "%id        = OpVariable %uvec3ptr Input\n"
2651                         "%zero      = OpConstant %i32 0\n"
2652
2653                         "%main      = OpFunction %void None %voidf\n"
2654                         "%label     = OpLabel\n"
2655                         "%idval     = OpLoad %uvec3 %id\n"
2656                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2657                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2658                         "%inval1    = OpLoad %i32 %inloc1\n"
2659                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2660                         "%inval2    = OpLoad %i32 %inloc2\n"
2661                         "%rem       = OpSMod %i32 %inval1 %inval2\n"
2662                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2663                         "             OpStore %outloc %rem\n"
2664                         "             OpReturn\n"
2665                         "             OpFunctionEnd\n";
2666
2667                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
2668                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
2669                 spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
2670                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2671                 spec.failResult                 = params.failResult;
2672                 spec.failMessage                = params.failMessage;
2673
2674                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2675         }
2676
2677         return group.release();
2678 }
2679
2680 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2681 {
2682         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2683         de::Random                                              rnd                             (deStringHash(group->getName()));
2684         const int                                               numElements             = 200;
2685
2686         const struct CaseParams
2687         {
2688                 const char*             name;
2689                 const char*             failMessage;            // customized status message
2690                 qpTestResult    failResult;                     // override status on failure
2691                 bool                    positive;
2692         } cases[] =
2693         {
2694                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
2695                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
2696         };
2697         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2698
2699         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2700         {
2701                 const CaseParams&       params          = cases[caseNdx];
2702
2703                 ComputeShaderSpec       spec;
2704                 vector<deInt64>         inputInts1      (numElements, 0);
2705                 vector<deInt64>         inputInts2      (numElements, 0);
2706                 vector<deInt64>         outputInts      (numElements, 0);
2707
2708
2709                 if (params.positive)
2710                 {
2711                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2712                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2713                 }
2714                 else
2715                 {
2716                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2717                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2718                 }
2719
2720                 for (int ndx = 0; ndx < numElements; ++ndx)
2721                 {
2722                         deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2723                         if (rem == 0)
2724                         {
2725                                 outputInts[ndx] = 0;
2726                         }
2727                         else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2728                         {
2729                                 // They have the same sign
2730                                 outputInts[ndx] = rem;
2731                         }
2732                         else
2733                         {
2734                                 // They have opposite sign.  The remainder operation takes the
2735                                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2736                                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2737                                 // the result has the correct sign and that it is still
2738                                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2739                                 //
2740                                 // See also http://mathforum.org/library/drmath/view/52343.html
2741                                 outputInts[ndx] = rem + inputInts2[ndx];
2742                         }
2743                 }
2744
2745                 spec.assembly =
2746                         "OpCapability Int64\n"
2747
2748                         + string(getComputeAsmShaderPreamble()) +
2749
2750                         "OpName %main           \"main\"\n"
2751                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2752
2753                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2754
2755                         "OpDecorate %buf BufferBlock\n"
2756                         "OpDecorate %indata1 DescriptorSet 0\n"
2757                         "OpDecorate %indata1 Binding 0\n"
2758                         "OpDecorate %indata2 DescriptorSet 0\n"
2759                         "OpDecorate %indata2 Binding 1\n"
2760                         "OpDecorate %outdata DescriptorSet 0\n"
2761                         "OpDecorate %outdata Binding 2\n"
2762                         "OpDecorate %i64arr ArrayStride 8\n"
2763                         "OpMemberDecorate %buf 0 Offset 0\n"
2764
2765                         + string(getComputeAsmCommonTypes())
2766                         + string(getComputeAsmCommonInt64Types()) +
2767
2768                         "%buf        = OpTypeStruct %i64arr\n"
2769                         "%bufptr     = OpTypePointer Uniform %buf\n"
2770                         "%indata1    = OpVariable %bufptr Uniform\n"
2771                         "%indata2    = OpVariable %bufptr Uniform\n"
2772                         "%outdata    = OpVariable %bufptr Uniform\n"
2773
2774                         "%id        = OpVariable %uvec3ptr Input\n"
2775                         "%zero      = OpConstant %i64 0\n"
2776
2777                         "%main      = OpFunction %void None %voidf\n"
2778                         "%label     = OpLabel\n"
2779                         "%idval     = OpLoad %uvec3 %id\n"
2780                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2781                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2782                         "%inval1    = OpLoad %i64 %inloc1\n"
2783                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2784                         "%inval2    = OpLoad %i64 %inloc2\n"
2785                         "%rem       = OpSMod %i64 %inval1 %inval2\n"
2786                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2787                         "             OpStore %outloc %rem\n"
2788                         "             OpReturn\n"
2789                         "             OpFunctionEnd\n";
2790
2791                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
2792                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
2793                 spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
2794                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2795                 spec.failResult                 = params.failResult;
2796                 spec.failMessage                = params.failMessage;
2797
2798                 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2799
2800                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2801         }
2802
2803         return group.release();
2804 }
2805
2806 // Copy contents in the input buffer to the output buffer.
2807 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2808 {
2809         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2810         de::Random                                              rnd                             (deStringHash(group->getName()));
2811         const int                                               numElements             = 100;
2812
2813         // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2814         ComputeShaderSpec                               spec1;
2815         vector<Vec4>                                    inputFloats1    (numElements);
2816         vector<Vec4>                                    outputFloats1   (numElements);
2817
2818         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2819
2820         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2821         floorAll(inputFloats1);
2822
2823         for (size_t ndx = 0; ndx < numElements; ++ndx)
2824                 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2825
2826         spec1.assembly =
2827                 string(getComputeAsmShaderPreamble()) +
2828
2829                 "OpName %main           \"main\"\n"
2830                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2831
2832                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2833                 "OpDecorate %vec4arr ArrayStride 16\n"
2834
2835                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2836
2837                 "%vec4       = OpTypeVector %f32 4\n"
2838                 "%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
2839                 "%vec4ptr_f  = OpTypePointer Function %vec4\n"
2840                 "%vec4arr    = OpTypeRuntimeArray %vec4\n"
2841                 "%buf        = OpTypeStruct %vec4arr\n"
2842                 "%bufptr     = OpTypePointer Uniform %buf\n"
2843                 "%indata     = OpVariable %bufptr Uniform\n"
2844                 "%outdata    = OpVariable %bufptr Uniform\n"
2845
2846                 "%id         = OpVariable %uvec3ptr Input\n"
2847                 "%zero       = OpConstant %i32 0\n"
2848                 "%c_f_0      = OpConstant %f32 0.\n"
2849                 "%c_f_0_5    = OpConstant %f32 0.5\n"
2850                 "%c_f_1_5    = OpConstant %f32 1.5\n"
2851                 "%c_f_2_5    = OpConstant %f32 2.5\n"
2852                 "%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2853
2854                 "%main       = OpFunction %void None %voidf\n"
2855                 "%label      = OpLabel\n"
2856                 "%v_vec4     = OpVariable %vec4ptr_f Function\n"
2857                 "%idval      = OpLoad %uvec3 %id\n"
2858                 "%x          = OpCompositeExtract %u32 %idval 0\n"
2859                 "%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2860                 "%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2861                 "              OpCopyMemory %v_vec4 %inloc\n"
2862                 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2863                 "%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2864                 "              OpStore %outloc %add\n"
2865                 "              OpReturn\n"
2866                 "              OpFunctionEnd\n";
2867
2868         spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2869         spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2870         spec1.numWorkGroups = IVec3(numElements, 1, 1);
2871
2872         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2873
2874         // The following case copies a float[100] variable from the input buffer to the output buffer.
2875         ComputeShaderSpec                               spec2;
2876         vector<float>                                   inputFloats2    (numElements);
2877         vector<float>                                   outputFloats2   (numElements);
2878
2879         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2880
2881         for (size_t ndx = 0; ndx < numElements; ++ndx)
2882                 outputFloats2[ndx] = inputFloats2[ndx];
2883
2884         spec2.assembly =
2885                 string(getComputeAsmShaderPreamble()) +
2886
2887                 "OpName %main           \"main\"\n"
2888                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2889
2890                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2891                 "OpDecorate %f32arr100 ArrayStride 4\n"
2892
2893                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2894
2895                 "%hundred        = OpConstant %u32 100\n"
2896                 "%f32arr100      = OpTypeArray %f32 %hundred\n"
2897                 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2898                 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2899                 "%buf            = OpTypeStruct %f32arr100\n"
2900                 "%bufptr         = OpTypePointer Uniform %buf\n"
2901                 "%indata         = OpVariable %bufptr Uniform\n"
2902                 "%outdata        = OpVariable %bufptr Uniform\n"
2903
2904                 "%id             = OpVariable %uvec3ptr Input\n"
2905                 "%zero           = OpConstant %i32 0\n"
2906
2907                 "%main           = OpFunction %void None %voidf\n"
2908                 "%label          = OpLabel\n"
2909                 "%var            = OpVariable %f32arr100ptr_f Function\n"
2910                 "%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2911                 "%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2912                 "                  OpCopyMemory %var %inarr\n"
2913                 "                  OpCopyMemory %outarr %var\n"
2914                 "                  OpReturn\n"
2915                 "                  OpFunctionEnd\n";
2916
2917         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2918         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2919         spec2.numWorkGroups = IVec3(1, 1, 1);
2920
2921         group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2922
2923         // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2924         ComputeShaderSpec                               spec3;
2925         vector<float>                                   inputFloats3    (16);
2926         vector<float>                                   outputFloats3   (16);
2927
2928         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2929
2930         for (size_t ndx = 0; ndx < 16; ++ndx)
2931                 outputFloats3[ndx] = inputFloats3[ndx];
2932
2933         spec3.assembly =
2934                 string(getComputeAsmShaderPreamble()) +
2935
2936                 "OpName %main           \"main\"\n"
2937                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2938
2939                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2940                 //"OpMemberDecorate %buf 0 Offset 0\n"  - exists in getComputeAsmInputOutputBufferTraits
2941                 "OpMemberDecorate %buf 1 Offset 16\n"
2942                 "OpMemberDecorate %buf 2 Offset 32\n"
2943                 "OpMemberDecorate %buf 3 Offset 48\n"
2944
2945                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2946
2947                 "%vec4      = OpTypeVector %f32 4\n"
2948                 "%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
2949                 "%bufptr    = OpTypePointer Uniform %buf\n"
2950                 "%indata    = OpVariable %bufptr Uniform\n"
2951                 "%outdata   = OpVariable %bufptr Uniform\n"
2952                 "%vec4stptr = OpTypePointer Function %buf\n"
2953
2954                 "%id        = OpVariable %uvec3ptr Input\n"
2955                 "%zero      = OpConstant %i32 0\n"
2956
2957                 "%main      = OpFunction %void None %voidf\n"
2958                 "%label     = OpLabel\n"
2959                 "%var       = OpVariable %vec4stptr Function\n"
2960                 "             OpCopyMemory %var %indata\n"
2961                 "             OpCopyMemory %outdata %var\n"
2962                 "             OpReturn\n"
2963                 "             OpFunctionEnd\n";
2964
2965         spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2966         spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
2967         spec3.numWorkGroups = IVec3(1, 1, 1);
2968
2969         group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
2970
2971         // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
2972         ComputeShaderSpec                               spec4;
2973         vector<float>                                   inputFloats4    (numElements);
2974         vector<float>                                   outputFloats4   (numElements);
2975
2976         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
2977
2978         for (size_t ndx = 0; ndx < numElements; ++ndx)
2979                 outputFloats4[ndx] = -inputFloats4[ndx];
2980
2981         spec4.assembly =
2982                 string(getComputeAsmShaderPreamble()) +
2983
2984                 "OpName %main           \"main\"\n"
2985                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2986
2987                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2988
2989                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
2990
2991                 "%f32ptr_f  = OpTypePointer Function %f32\n"
2992                 "%id        = OpVariable %uvec3ptr Input\n"
2993                 "%zero      = OpConstant %i32 0\n"
2994
2995                 "%main      = OpFunction %void None %voidf\n"
2996                 "%label     = OpLabel\n"
2997                 "%var       = OpVariable %f32ptr_f Function\n"
2998                 "%idval     = OpLoad %uvec3 %id\n"
2999                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3000                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3001                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3002                 "             OpCopyMemory %var %inloc\n"
3003                 "%val       = OpLoad %f32 %var\n"
3004                 "%neg       = OpFNegate %f32 %val\n"
3005                 "             OpStore %outloc %neg\n"
3006                 "             OpReturn\n"
3007                 "             OpFunctionEnd\n";
3008
3009         spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3010         spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3011         spec4.numWorkGroups = IVec3(numElements, 1, 1);
3012
3013         group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3014
3015         return group.release();
3016 }
3017
3018 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3019 {
3020         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3021         ComputeShaderSpec                               spec;
3022         de::Random                                              rnd                             (deStringHash(group->getName()));
3023         const int                                               numElements             = 100;
3024         vector<float>                                   inputFloats             (numElements, 0);
3025         vector<float>                                   outputFloats    (numElements, 0);
3026
3027         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3028
3029         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3030         floorAll(inputFloats);
3031
3032         for (size_t ndx = 0; ndx < numElements; ++ndx)
3033                 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3034
3035         spec.assembly =
3036                 string(getComputeAsmShaderPreamble()) +
3037
3038                 "OpName %main           \"main\"\n"
3039                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3040
3041                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3042
3043                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3044
3045                 "%fmat     = OpTypeMatrix %fvec3 3\n"
3046                 "%three    = OpConstant %u32 3\n"
3047                 "%farr     = OpTypeArray %f32 %three\n"
3048                 "%fst      = OpTypeStruct %f32 %f32\n"
3049
3050                 + string(getComputeAsmInputOutputBuffer()) +
3051
3052                 "%id            = OpVariable %uvec3ptr Input\n"
3053                 "%zero          = OpConstant %i32 0\n"
3054                 "%c_f           = OpConstant %f32 1.5\n"
3055                 "%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3056                 "%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3057                 "%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
3058                 "%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
3059
3060                 "%main          = OpFunction %void None %voidf\n"
3061                 "%label         = OpLabel\n"
3062                 "%c_f_copy      = OpCopyObject %f32   %c_f\n"
3063                 "%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
3064                 "%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
3065                 "%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
3066                 "%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
3067                 "%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3068                 "%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3069                 "%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
3070                 "%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
3071                 // Add up. 1.5 * 5 = 7.5.
3072                 "%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3073                 "%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
3074                 "%add3          = OpFAdd %f32 %add2     %farr_elem\n"
3075                 "%add4          = OpFAdd %f32 %add3     %fst_elem\n"
3076
3077                 "%idval         = OpLoad %uvec3 %id\n"
3078                 "%x             = OpCompositeExtract %u32 %idval 0\n"
3079                 "%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
3080                 "%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
3081                 "%inval         = OpLoad %f32 %inloc\n"
3082                 "%add           = OpFAdd %f32 %add4 %inval\n"
3083                 "                 OpStore %outloc %add\n"
3084                 "                 OpReturn\n"
3085                 "                 OpFunctionEnd\n";
3086         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3087         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3088         spec.numWorkGroups = IVec3(numElements, 1, 1);
3089
3090         group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3091
3092         return group.release();
3093 }
3094 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3095 //
3096 // #version 430
3097 //
3098 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3099 //   float elements[];
3100 // } input_data;
3101 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3102 //   float elements[];
3103 // } output_data;
3104 //
3105 // void not_called_func() {
3106 //   // place OpUnreachable here
3107 // }
3108 //
3109 // uint modulo4(uint val) {
3110 //   switch (val % uint(4)) {
3111 //     case 0:  return 3;
3112 //     case 1:  return 2;
3113 //     case 2:  return 1;
3114 //     case 3:  return 0;
3115 //     default: return 100; // place OpUnreachable here
3116 //   }
3117 // }
3118 //
3119 // uint const5() {
3120 //   return 5;
3121 //   // place OpUnreachable here
3122 // }
3123 //
3124 // void main() {
3125 //   uint x = gl_GlobalInvocationID.x;
3126 //   if (const5() > modulo4(1000)) {
3127 //     output_data.elements[x] = -input_data.elements[x];
3128 //   } else {
3129 //     // place OpUnreachable here
3130 //     output_data.elements[x] = input_data.elements[x];
3131 //   }
3132 // }
3133
3134 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3135 {
3136         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3137         ComputeShaderSpec                               spec;
3138         de::Random                                              rnd                             (deStringHash(group->getName()));
3139         const int                                               numElements             = 100;
3140         vector<float>                                   positiveFloats  (numElements, 0);
3141         vector<float>                                   negativeFloats  (numElements, 0);
3142
3143         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3144
3145         for (size_t ndx = 0; ndx < numElements; ++ndx)
3146                 negativeFloats[ndx] = -positiveFloats[ndx];
3147
3148         spec.assembly =
3149                 string(getComputeAsmShaderPreamble()) +
3150
3151                 "OpSource GLSL 430\n"
3152                 "OpName %main            \"main\"\n"
3153                 "OpName %func_not_called_func \"not_called_func(\"\n"
3154                 "OpName %func_modulo4         \"modulo4(u1;\"\n"
3155                 "OpName %func_const5          \"const5(\"\n"
3156                 "OpName %id                   \"gl_GlobalInvocationID\"\n"
3157
3158                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3159
3160                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3161
3162                 "%u32ptr    = OpTypePointer Function %u32\n"
3163                 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3164                 "%unitf     = OpTypeFunction %u32\n"
3165
3166                 "%id        = OpVariable %uvec3ptr Input\n"
3167                 "%zero      = OpConstant %u32 0\n"
3168                 "%one       = OpConstant %u32 1\n"
3169                 "%two       = OpConstant %u32 2\n"
3170                 "%three     = OpConstant %u32 3\n"
3171                 "%four      = OpConstant %u32 4\n"
3172                 "%five      = OpConstant %u32 5\n"
3173                 "%hundred   = OpConstant %u32 100\n"
3174                 "%thousand  = OpConstant %u32 1000\n"
3175
3176                 + string(getComputeAsmInputOutputBuffer()) +
3177
3178                 // Main()
3179                 "%main   = OpFunction %void None %voidf\n"
3180                 "%main_entry  = OpLabel\n"
3181                 "%v_thousand  = OpVariable %u32ptr Function %thousand\n"
3182                 "%idval       = OpLoad %uvec3 %id\n"
3183                 "%x           = OpCompositeExtract %u32 %idval 0\n"
3184                 "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
3185                 "%inval       = OpLoad %f32 %inloc\n"
3186                 "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
3187                 "%ret_const5  = OpFunctionCall %u32 %func_const5\n"
3188                 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3189                 "%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3190                 "               OpSelectionMerge %if_end None\n"
3191                 "               OpBranchConditional %cmp_gt %if_true %if_false\n"
3192                 "%if_true     = OpLabel\n"
3193                 "%negate      = OpFNegate %f32 %inval\n"
3194                 "               OpStore %outloc %negate\n"
3195                 "               OpBranch %if_end\n"
3196                 "%if_false    = OpLabel\n"
3197                 "               OpUnreachable\n" // Unreachable else branch for if statement
3198                 "%if_end      = OpLabel\n"
3199                 "               OpReturn\n"
3200                 "               OpFunctionEnd\n"
3201
3202                 // not_called_function()
3203                 "%func_not_called_func  = OpFunction %void None %voidf\n"
3204                 "%not_called_func_entry = OpLabel\n"
3205                 "                         OpUnreachable\n" // Unreachable entry block in not called static function
3206                 "                         OpFunctionEnd\n"
3207
3208                 // modulo4()
3209                 "%func_modulo4  = OpFunction %u32 None %uintfuint\n"
3210                 "%valptr        = OpFunctionParameter %u32ptr\n"
3211                 "%modulo4_entry = OpLabel\n"
3212                 "%val           = OpLoad %u32 %valptr\n"
3213                 "%modulo        = OpUMod %u32 %val %four\n"
3214                 "                 OpSelectionMerge %switch_merge None\n"
3215                 "                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3216                 "%case0         = OpLabel\n"
3217                 "                 OpReturnValue %three\n"
3218                 "%case1         = OpLabel\n"
3219                 "                 OpReturnValue %two\n"
3220                 "%case2         = OpLabel\n"
3221                 "                 OpReturnValue %one\n"
3222                 "%case3         = OpLabel\n"
3223                 "                 OpReturnValue %zero\n"
3224                 "%default       = OpLabel\n"
3225                 "                 OpUnreachable\n" // Unreachable default case for switch statement
3226                 "%switch_merge  = OpLabel\n"
3227                 "                 OpUnreachable\n" // Unreachable merge block for switch statement
3228                 "                 OpFunctionEnd\n"
3229
3230                 // const5()
3231                 "%func_const5  = OpFunction %u32 None %unitf\n"
3232                 "%const5_entry = OpLabel\n"
3233                 "                OpReturnValue %five\n"
3234                 "%unreachable  = OpLabel\n"
3235                 "                OpUnreachable\n" // Unreachable block in function
3236                 "                OpFunctionEnd\n";
3237         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3238         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3239         spec.numWorkGroups = IVec3(numElements, 1, 1);
3240
3241         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3242
3243         return group.release();
3244 }
3245
3246 // Assembly code used for testing decoration group is based on GLSL source code:
3247 //
3248 // #version 430
3249 //
3250 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3251 //   float elements[];
3252 // } input_data0;
3253 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3254 //   float elements[];
3255 // } input_data1;
3256 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3257 //   float elements[];
3258 // } input_data2;
3259 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3260 //   float elements[];
3261 // } input_data3;
3262 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3263 //   float elements[];
3264 // } input_data4;
3265 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3266 //   float elements[];
3267 // } output_data;
3268 //
3269 // void main() {
3270 //   uint x = gl_GlobalInvocationID.x;
3271 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3272 // }
3273 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3274 {
3275         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3276         ComputeShaderSpec                               spec;
3277         de::Random                                              rnd                             (deStringHash(group->getName()));
3278         const int                                               numElements             = 100;
3279         vector<float>                                   inputFloats0    (numElements, 0);
3280         vector<float>                                   inputFloats1    (numElements, 0);
3281         vector<float>                                   inputFloats2    (numElements, 0);
3282         vector<float>                                   inputFloats3    (numElements, 0);
3283         vector<float>                                   inputFloats4    (numElements, 0);
3284         vector<float>                                   outputFloats    (numElements, 0);
3285
3286         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3287         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3288         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3289         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3290         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3291
3292         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3293         floorAll(inputFloats0);
3294         floorAll(inputFloats1);
3295         floorAll(inputFloats2);
3296         floorAll(inputFloats3);
3297         floorAll(inputFloats4);
3298
3299         for (size_t ndx = 0; ndx < numElements; ++ndx)
3300                 outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3301
3302         spec.assembly =
3303                 string(getComputeAsmShaderPreamble()) +
3304
3305                 "OpSource GLSL 430\n"
3306                 "OpName %main \"main\"\n"
3307                 "OpName %id \"gl_GlobalInvocationID\"\n"
3308
3309                 // Not using group decoration on variable.
3310                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3311                 // Not using group decoration on type.
3312                 "OpDecorate %f32arr ArrayStride 4\n"
3313
3314                 "OpDecorate %groups BufferBlock\n"
3315                 "OpDecorate %groupm Offset 0\n"
3316                 "%groups = OpDecorationGroup\n"
3317                 "%groupm = OpDecorationGroup\n"
3318
3319                 // Group decoration on multiple structs.
3320                 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3321                 // Group decoration on multiple struct members.
3322                 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3323
3324                 "OpDecorate %group1 DescriptorSet 0\n"
3325                 "OpDecorate %group3 DescriptorSet 0\n"
3326                 "OpDecorate %group3 NonWritable\n"
3327                 "OpDecorate %group3 Restrict\n"
3328                 "%group0 = OpDecorationGroup\n"
3329                 "%group1 = OpDecorationGroup\n"
3330                 "%group3 = OpDecorationGroup\n"
3331
3332                 // Applying the same decoration group multiple times.
3333                 "OpGroupDecorate %group1 %outdata\n"
3334                 "OpGroupDecorate %group1 %outdata\n"
3335                 "OpGroupDecorate %group1 %outdata\n"
3336                 "OpDecorate %outdata DescriptorSet 0\n"
3337                 "OpDecorate %outdata Binding 5\n"
3338                 // Applying decoration group containing nothing.
3339                 "OpGroupDecorate %group0 %indata0\n"
3340                 "OpDecorate %indata0 DescriptorSet 0\n"
3341                 "OpDecorate %indata0 Binding 0\n"
3342                 // Applying decoration group containing one decoration.
3343                 "OpGroupDecorate %group1 %indata1\n"
3344                 "OpDecorate %indata1 Binding 1\n"
3345                 // Applying decoration group containing multiple decorations.
3346                 "OpGroupDecorate %group3 %indata2 %indata3\n"
3347                 "OpDecorate %indata2 Binding 2\n"
3348                 "OpDecorate %indata3 Binding 3\n"
3349                 // Applying multiple decoration groups (with overlapping).
3350                 "OpGroupDecorate %group0 %indata4\n"
3351                 "OpGroupDecorate %group1 %indata4\n"
3352                 "OpGroupDecorate %group3 %indata4\n"
3353                 "OpDecorate %indata4 Binding 4\n"
3354
3355                 + string(getComputeAsmCommonTypes()) +
3356
3357                 "%id   = OpVariable %uvec3ptr Input\n"
3358                 "%zero = OpConstant %i32 0\n"
3359
3360                 "%outbuf    = OpTypeStruct %f32arr\n"
3361                 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3362                 "%outdata   = OpVariable %outbufptr Uniform\n"
3363                 "%inbuf0    = OpTypeStruct %f32arr\n"
3364                 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3365                 "%indata0   = OpVariable %inbuf0ptr Uniform\n"
3366                 "%inbuf1    = OpTypeStruct %f32arr\n"
3367                 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3368                 "%indata1   = OpVariable %inbuf1ptr Uniform\n"
3369                 "%inbuf2    = OpTypeStruct %f32arr\n"
3370                 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3371                 "%indata2   = OpVariable %inbuf2ptr Uniform\n"
3372                 "%inbuf3    = OpTypeStruct %f32arr\n"
3373                 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3374                 "%indata3   = OpVariable %inbuf3ptr Uniform\n"
3375                 "%inbuf4    = OpTypeStruct %f32arr\n"
3376                 "%inbufptr  = OpTypePointer Uniform %inbuf4\n"
3377                 "%indata4   = OpVariable %inbufptr Uniform\n"
3378
3379                 "%main   = OpFunction %void None %voidf\n"
3380                 "%label  = OpLabel\n"
3381                 "%idval  = OpLoad %uvec3 %id\n"
3382                 "%x      = OpCompositeExtract %u32 %idval 0\n"
3383                 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3384                 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3385                 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3386                 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3387                 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3388                 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3389                 "%inval0 = OpLoad %f32 %inloc0\n"
3390                 "%inval1 = OpLoad %f32 %inloc1\n"
3391                 "%inval2 = OpLoad %f32 %inloc2\n"
3392                 "%inval3 = OpLoad %f32 %inloc3\n"
3393                 "%inval4 = OpLoad %f32 %inloc4\n"
3394                 "%add0   = OpFAdd %f32 %inval0 %inval1\n"
3395                 "%add1   = OpFAdd %f32 %add0 %inval2\n"
3396                 "%add2   = OpFAdd %f32 %add1 %inval3\n"
3397                 "%add    = OpFAdd %f32 %add2 %inval4\n"
3398                 "          OpStore %outloc %add\n"
3399                 "          OpReturn\n"
3400                 "          OpFunctionEnd\n";
3401         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3402         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3403         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3404         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3405         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3406         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3407         spec.numWorkGroups = IVec3(numElements, 1, 1);
3408
3409         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3410
3411         return group.release();
3412 }
3413
3414 struct SpecConstantTwoIntCase
3415 {
3416         const char*             caseName;
3417         const char*             scDefinition0;
3418         const char*             scDefinition1;
3419         const char*             scResultType;
3420         const char*             scOperation;
3421         deInt32                 scActualValue0;
3422         deInt32                 scActualValue1;
3423         const char*             resultOperation;
3424         vector<deInt32> expectedOutput;
3425         deInt32                 scActualValueLength;
3426
3427                                         SpecConstantTwoIntCase (const char* name,
3428                                                                                         const char* definition0,
3429                                                                                         const char* definition1,
3430                                                                                         const char* resultType,
3431                                                                                         const char* operation,
3432                                                                                         deInt32 value0,
3433                                                                                         deInt32 value1,
3434                                                                                         const char* resultOp,
3435                                                                                         const vector<deInt32>& output,
3436                                                                                         const deInt32   valueLength = sizeof(deInt32))
3437                                                 : caseName                              (name)
3438                                                 , scDefinition0                 (definition0)
3439                                                 , scDefinition1                 (definition1)
3440                                                 , scResultType                  (resultType)
3441                                                 , scOperation                   (operation)
3442                                                 , scActualValue0                (value0)
3443                                                 , scActualValue1                (value1)
3444                                                 , resultOperation               (resultOp)
3445                                                 , expectedOutput                (output)
3446                                                 , scActualValueLength   (valueLength)
3447                                                 {}
3448 };
3449
3450 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3451 {
3452         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3453         vector<SpecConstantTwoIntCase>  cases;
3454         de::Random                                              rnd                             (deStringHash(group->getName()));
3455         const int                                               numElements             = 100;
3456         const deInt32                                   p1AsFloat16             = 0x3c00; // +1(fp16) == 0 01111 0000000000 == 0011 1100 0000 0000
3457         vector<deInt32>                                 inputInts               (numElements, 0);
3458         vector<deInt32>                                 outputInts1             (numElements, 0);
3459         vector<deInt32>                                 outputInts2             (numElements, 0);
3460         vector<deInt32>                                 outputInts3             (numElements, 0);
3461         vector<deInt32>                                 outputInts4             (numElements, 0);
3462         const StringTemplate                    shaderTemplate  (
3463                 "${CAPABILITIES:opt}"
3464                 + string(getComputeAsmShaderPreamble()) +
3465
3466                 "OpName %main           \"main\"\n"
3467                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3468
3469                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3470                 "OpDecorate %sc_0  SpecId 0\n"
3471                 "OpDecorate %sc_1  SpecId 1\n"
3472                 "OpDecorate %i32arr ArrayStride 4\n"
3473
3474                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3475
3476                 "${OPTYPE_DEFINITIONS:opt}"
3477                 "%buf     = OpTypeStruct %i32arr\n"
3478                 "%bufptr  = OpTypePointer Uniform %buf\n"
3479                 "%indata    = OpVariable %bufptr Uniform\n"
3480                 "%outdata   = OpVariable %bufptr Uniform\n"
3481
3482                 "%id        = OpVariable %uvec3ptr Input\n"
3483                 "%zero      = OpConstant %i32 0\n"
3484
3485                 "%sc_0      = OpSpecConstant${SC_DEF0}\n"
3486                 "%sc_1      = OpSpecConstant${SC_DEF1}\n"
3487                 "%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3488
3489                 "%main      = OpFunction %void None %voidf\n"
3490                 "%label     = OpLabel\n"
3491                 "${TYPE_CONVERT:opt}"
3492                 "%idval     = OpLoad %uvec3 %id\n"
3493                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3494                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3495                 "%inval     = OpLoad %i32 %inloc\n"
3496                 "%final     = ${GEN_RESULT}\n"
3497                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3498                 "             OpStore %outloc %final\n"
3499                 "             OpReturn\n"
3500                 "             OpFunctionEnd\n");
3501
3502         fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3503
3504         for (size_t ndx = 0; ndx < numElements; ++ndx)
3505         {
3506                 outputInts1[ndx] = inputInts[ndx] + 42;
3507                 outputInts2[ndx] = inputInts[ndx];
3508                 outputInts3[ndx] = inputInts[ndx] - 11200;
3509                 outputInts4[ndx] = inputInts[ndx] + 1;
3510         }
3511
3512         const char addScToInput[]               = "OpIAdd %i32 %inval %sc_final";
3513         const char addSc32ToInput[]             = "OpIAdd %i32 %inval %sc_final32";
3514         const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_final %inval %zero";
3515         const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3516
3517         cases.push_back(SpecConstantTwoIntCase("iadd",                                  " %i32 0",              " %i32 0",              "%i32",         "IAdd                 %sc_0 %sc_1",                     62,             -20,    addScToInput,           outputInts1));
3518         cases.push_back(SpecConstantTwoIntCase("isub",                                  " %i32 0",              " %i32 0",              "%i32",         "ISub                 %sc_0 %sc_1",                     100,    58,             addScToInput,           outputInts1));
3519         cases.push_back(SpecConstantTwoIntCase("imul",                                  " %i32 0",              " %i32 0",              "%i32",         "IMul                 %sc_0 %sc_1",                     -2,             -21,    addScToInput,           outputInts1));
3520         cases.push_back(SpecConstantTwoIntCase("sdiv",                                  " %i32 0",              " %i32 0",              "%i32",         "SDiv                 %sc_0 %sc_1",                     -126,   -3,             addScToInput,           outputInts1));
3521         cases.push_back(SpecConstantTwoIntCase("udiv",                                  " %i32 0",              " %i32 0",              "%i32",         "UDiv                 %sc_0 %sc_1",                     126,    3,              addScToInput,           outputInts1));
3522         cases.push_back(SpecConstantTwoIntCase("srem",                                  " %i32 0",              " %i32 0",              "%i32",         "SRem                 %sc_0 %sc_1",                     7,              3,              addScToInput,           outputInts4));
3523         cases.push_back(SpecConstantTwoIntCase("smod",                                  " %i32 0",              " %i32 0",              "%i32",         "SMod                 %sc_0 %sc_1",                     7,              3,              addScToInput,           outputInts4));
3524         cases.push_back(SpecConstantTwoIntCase("umod",                                  " %i32 0",              " %i32 0",              "%i32",         "UMod                 %sc_0 %sc_1",                     342,    50,             addScToInput,           outputInts1));
3525         cases.push_back(SpecConstantTwoIntCase("bitwiseand",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseAnd           %sc_0 %sc_1",                     42,             63,             addScToInput,           outputInts1));
3526         cases.push_back(SpecConstantTwoIntCase("bitwiseor",                             " %i32 0",              " %i32 0",              "%i32",         "BitwiseOr            %sc_0 %sc_1",                     34,             8,              addScToInput,           outputInts1));
3527         cases.push_back(SpecConstantTwoIntCase("bitwisexor",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseXor           %sc_0 %sc_1",                     18,             56,             addScToInput,           outputInts1));
3528         cases.push_back(SpecConstantTwoIntCase("shiftrightlogical",             " %i32 0",              " %i32 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                     168,    2,              addScToInput,           outputInts1));
3529         cases.push_back(SpecConstantTwoIntCase("shiftrightarithmetic",  " %i32 0",              " %i32 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                     168,    2,              addScToInput,           outputInts1));
3530         cases.push_back(SpecConstantTwoIntCase("shiftleftlogical",              " %i32 0",              " %i32 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                     21,             1,              addScToInput,           outputInts1));
3531         cases.push_back(SpecConstantTwoIntCase("slessthan",                             " %i32 0",              " %i32 0",              "%bool",        "SLessThan            %sc_0 %sc_1",                     -20,    -10,    selectTrueUsingSc,      outputInts2));
3532         cases.push_back(SpecConstantTwoIntCase("ulessthan",                             " %i32 0",              " %i32 0",              "%bool",        "ULessThan            %sc_0 %sc_1",                     10,             20,             selectTrueUsingSc,      outputInts2));
3533         cases.push_back(SpecConstantTwoIntCase("sgreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "SGreaterThan         %sc_0 %sc_1",                     -1000,  50,             selectFalseUsingSc,     outputInts2));
3534         cases.push_back(SpecConstantTwoIntCase("ugreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "UGreaterThan         %sc_0 %sc_1",                     10,             5,              selectTrueUsingSc,      outputInts2));
3535         cases.push_back(SpecConstantTwoIntCase("slessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "SLessThanEqual       %sc_0 %sc_1",                     -10,    -10,    selectTrueUsingSc,      outputInts2));
3536         cases.push_back(SpecConstantTwoIntCase("ulessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "ULessThanEqual       %sc_0 %sc_1",                     50,             100,    selectTrueUsingSc,      outputInts2));
3537         cases.push_back(SpecConstantTwoIntCase("sgreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "SGreaterThanEqual    %sc_0 %sc_1",                     -1000,  50,             selectFalseUsingSc,     outputInts2));
3538         cases.push_back(SpecConstantTwoIntCase("ugreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "UGreaterThanEqual    %sc_0 %sc_1",                     10,             10,             selectTrueUsingSc,      outputInts2));
3539         cases.push_back(SpecConstantTwoIntCase("iequal",                                " %i32 0",              " %i32 0",              "%bool",        "IEqual               %sc_0 %sc_1",                     42,             24,             selectFalseUsingSc,     outputInts2));
3540         cases.push_back(SpecConstantTwoIntCase("inotequal",                             " %i32 0",              " %i32 0",              "%bool",        "INotEqual            %sc_0 %sc_1",                     42,             24,             selectTrueUsingSc,      outputInts2));
3541         cases.push_back(SpecConstantTwoIntCase("logicaland",                    "True %bool",   "True %bool",   "%bool",        "LogicalAnd           %sc_0 %sc_1",                     0,              1,              selectFalseUsingSc,     outputInts2));
3542         cases.push_back(SpecConstantTwoIntCase("logicalor",                             "False %bool",  "False %bool",  "%bool",        "LogicalOr            %sc_0 %sc_1",                     1,              0,              selectTrueUsingSc,      outputInts2));
3543         cases.push_back(SpecConstantTwoIntCase("logicalequal",                  "True %bool",   "True %bool",   "%bool",        "LogicalEqual         %sc_0 %sc_1",                     0,              1,              selectFalseUsingSc,     outputInts2));
3544         cases.push_back(SpecConstantTwoIntCase("logicalnotequal",               "False %bool",  "False %bool",  "%bool",        "LogicalNotEqual      %sc_0 %sc_1",                     1,              0,              selectTrueUsingSc,      outputInts2));
3545         cases.push_back(SpecConstantTwoIntCase("snegate",                               " %i32 0",              " %i32 0",              "%i32",         "SNegate              %sc_0",                           -42,    0,              addScToInput,           outputInts1));
3546         cases.push_back(SpecConstantTwoIntCase("not",                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                           -43,    0,              addScToInput,           outputInts1));
3547         cases.push_back(SpecConstantTwoIntCase("logicalnot",                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                           1,              0,              selectFalseUsingSc,     outputInts2));
3548         cases.push_back(SpecConstantTwoIntCase("select",                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %zero",       1,              42,             addScToInput,           outputInts1));
3549         cases.push_back(SpecConstantTwoIntCase("sconvert",                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                           -11200, 0,              addSc32ToInput,         outputInts3));
3550         // -969998336 stored as 32-bit two's complement is the binary representation of -11200 as IEEE-754 Float
3551         cases.push_back(SpecConstantTwoIntCase("fconvert",                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                           -969998336, 0,  addSc32ToInput,         outputInts3));
3552         cases.push_back(SpecConstantTwoIntCase("fconvert16",                    " %f16 0",              " %f16 0",              "%f32",         "FConvert             %sc_0",                           p1AsFloat16, 0, addSc32ToInput,         outputInts4, sizeof(deFloat16)));
3553
3554         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3555         {
3556                 map<string, string>             specializations;
3557                 ComputeShaderSpec               spec;
3558
3559                 specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
3560                 specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
3561                 specializations["SC_RESULT_TYPE"]       = cases[caseNdx].scResultType;
3562                 specializations["SC_OP"]                        = cases[caseNdx].scOperation;
3563                 specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
3564
3565                 // Special SPIR-V code for SConvert-case
3566                 if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
3567                 {
3568                         spec.requestedVulkanFeatures.coreFeatures.shaderInt16   = VK_TRUE;
3569                         specializations["CAPABILITIES"]                                                 = "OpCapability Int16\n";                                                       // Adds 16-bit integer capability
3570                         specializations["OPTYPE_DEFINITIONS"]                                   = "%i16 = OpTypeInt 16 1\n";                                            // Adds 16-bit integer type
3571                         specializations["TYPE_CONVERT"]                                                 = "%sc_final32 = OpSConvert %i32 %sc_final\n";          // Converts 16-bit integer to 32-bit integer
3572                 }
3573
3574                 // Special SPIR-V code for FConvert-case
3575                 if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
3576                 {
3577                         spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3578                         specializations["CAPABILITIES"]                                                 = "OpCapability Float64\n";                                                     // Adds 64-bit float capability
3579                         specializations["OPTYPE_DEFINITIONS"]                                   = "%f64 = OpTypeFloat 64\n";                                            // Adds 64-bit float type
3580                         specializations["TYPE_CONVERT"]                                                 = "%sc_final32 = OpConvertFToS %i32 %sc_final\n";       // Converts 64-bit float to 32-bit integer
3581                 }
3582
3583                 // Special SPIR-V code for FConvert-case for 16-bit floats
3584                 if (strcmp(cases[caseNdx].caseName, "fconvert16") == 0)
3585                 {
3586                         spec.extensions.push_back("VK_KHR_shader_float16_int8");
3587                         spec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3588                         specializations["CAPABILITIES"]                 = "OpCapability Float16\n";                                                     // Adds 16-bit float capability
3589                         specializations["OPTYPE_DEFINITIONS"]   = "%f16 = OpTypeFloat 16\n";                                            // Adds 16-bit float type
3590                         specializations["TYPE_CONVERT"]                 = "%sc_final32 = OpConvertFToS %i32 %sc_final\n";       // Converts 16-bit float to 32-bit integer
3591                 }
3592
3593                 spec.assembly = shaderTemplate.specialize(specializations);
3594                 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3595                 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3596                 spec.numWorkGroups = IVec3(numElements, 1, 1);
3597                 spec.specConstants.append(&cases[caseNdx].scActualValue0, cases[caseNdx].scActualValueLength);
3598                 spec.specConstants.append(&cases[caseNdx].scActualValue1, cases[caseNdx].scActualValueLength);
3599
3600                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
3601         }
3602
3603         ComputeShaderSpec                               spec;
3604
3605         spec.assembly =
3606                 string(getComputeAsmShaderPreamble()) +
3607
3608                 "OpName %main           \"main\"\n"
3609                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3610
3611                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3612                 "OpDecorate %sc_0  SpecId 0\n"
3613                 "OpDecorate %sc_1  SpecId 1\n"
3614                 "OpDecorate %sc_2  SpecId 2\n"
3615                 "OpDecorate %i32arr ArrayStride 4\n"
3616
3617                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3618
3619                 "%ivec3       = OpTypeVector %i32 3\n"
3620                 "%buf         = OpTypeStruct %i32arr\n"
3621                 "%bufptr      = OpTypePointer Uniform %buf\n"
3622                 "%indata      = OpVariable %bufptr Uniform\n"
3623                 "%outdata     = OpVariable %bufptr Uniform\n"
3624
3625                 "%id          = OpVariable %uvec3ptr Input\n"
3626                 "%zero        = OpConstant %i32 0\n"
3627                 "%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
3628                 "%vec3_undef  = OpUndef %ivec3\n"
3629
3630                 "%sc_0        = OpSpecConstant %i32 0\n"
3631                 "%sc_1        = OpSpecConstant %i32 0\n"
3632                 "%sc_2        = OpSpecConstant %i32 0\n"
3633                 "%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"                                                 // (sc_0, 0, 0)
3634                 "%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"                                                 // (0, sc_1, 0)
3635                 "%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"                                                 // (0, 0, sc_2)
3636                 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
3637                 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
3638                 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
3639                 "%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
3640                 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
3641                 "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
3642                 "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
3643                 "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
3644                 "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
3645                 "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"                                                              // (sc_2 - sc_0) * sc_1
3646
3647                 "%main      = OpFunction %void None %voidf\n"
3648                 "%label     = OpLabel\n"
3649                 "%idval     = OpLoad %uvec3 %id\n"
3650                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3651                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3652                 "%inval     = OpLoad %i32 %inloc\n"
3653                 "%final     = OpIAdd %i32 %inval %sc_final\n"
3654                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3655                 "             OpStore %outloc %final\n"
3656                 "             OpReturn\n"
3657                 "             OpFunctionEnd\n";
3658         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3659         spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
3660         spec.numWorkGroups = IVec3(numElements, 1, 1);
3661         spec.specConstants.append<deInt32>(123);
3662         spec.specConstants.append<deInt32>(56);
3663         spec.specConstants.append<deInt32>(-77);
3664
3665         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
3666
3667         return group.release();
3668 }
3669
3670 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
3671 {
3672         ComputeShaderSpec       specInt;
3673         ComputeShaderSpec       specFloat;
3674         ComputeShaderSpec       specFloat16;
3675         ComputeShaderSpec       specVec3;
3676         ComputeShaderSpec       specMat4;
3677         ComputeShaderSpec       specArray;
3678         ComputeShaderSpec       specStruct;
3679         de::Random                      rnd                             (deStringHash(group->getName()));
3680         const int                       numElements             = 100;
3681         vector<float>           inputFloats             (numElements, 0);
3682         vector<float>           outputFloats    (numElements, 0);
3683         vector<deFloat16>       inputFloats16   (numElements, 0);
3684         vector<deFloat16>       outputFloats16  (numElements, 0);
3685
3686         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
3687
3688         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3689         floorAll(inputFloats);
3690
3691         for (size_t ndx = 0; ndx < numElements; ++ndx)
3692         {
3693                 // Just check if the value is positive or not
3694                 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
3695         }
3696
3697         for (size_t ndx = 0; ndx < numElements; ++ndx)
3698         {
3699                 inputFloats16[ndx] = tcu::Float16(inputFloats[ndx]).bits();
3700                 outputFloats16[ndx] = tcu::Float16(outputFloats[ndx]).bits();
3701         }
3702
3703         // All of the tests are of the form:
3704         //
3705         // testtype r
3706         //
3707         // if (inputdata > 0)
3708         //   r = 1
3709         // else
3710         //   r = -1
3711         //
3712         // return (float)r
3713
3714         specFloat.assembly =
3715                 string(getComputeAsmShaderPreamble()) +
3716
3717                 "OpSource GLSL 430\n"
3718                 "OpName %main \"main\"\n"
3719                 "OpName %id \"gl_GlobalInvocationID\"\n"
3720
3721                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3722
3723                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3724
3725                 "%id = OpVariable %uvec3ptr Input\n"
3726                 "%zero       = OpConstant %i32 0\n"
3727                 "%float_0    = OpConstant %f32 0.0\n"
3728                 "%float_1    = OpConstant %f32 1.0\n"
3729                 "%float_n1   = OpConstant %f32 -1.0\n"
3730
3731                 "%main     = OpFunction %void None %voidf\n"
3732                 "%entry    = OpLabel\n"
3733                 "%idval    = OpLoad %uvec3 %id\n"
3734                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3735                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3736                 "%inval    = OpLoad %f32 %inloc\n"
3737
3738                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3739                 "            OpSelectionMerge %cm None\n"
3740                 "            OpBranchConditional %comp %tb %fb\n"
3741                 "%tb       = OpLabel\n"
3742                 "            OpBranch %cm\n"
3743                 "%fb       = OpLabel\n"
3744                 "            OpBranch %cm\n"
3745                 "%cm       = OpLabel\n"
3746                 "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
3747
3748                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3749                 "            OpStore %outloc %res\n"
3750                 "            OpReturn\n"
3751
3752                 "            OpFunctionEnd\n";
3753         specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3754         specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3755         specFloat.numWorkGroups = IVec3(numElements, 1, 1);
3756
3757         specFloat16.assembly =
3758                 "OpCapability Shader\n"
3759                 "OpCapability StorageUniformBufferBlock16\n"
3760                 "OpCapability Float16\n"
3761                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
3762                 "OpMemoryModel Logical GLSL450\n"
3763                 "OpEntryPoint GLCompute %main \"main\" %id\n"
3764                 "OpExecutionMode %main LocalSize 1 1 1\n"
3765
3766                 "OpSource GLSL 430\n"
3767                 "OpName %main \"main\"\n"
3768                 "OpName %id \"gl_GlobalInvocationID\"\n"
3769
3770                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3771
3772                 "OpDecorate %buf BufferBlock\n"
3773                 "OpDecorate %indata DescriptorSet 0\n"
3774                 "OpDecorate %indata Binding 0\n"
3775                 "OpDecorate %outdata DescriptorSet 0\n"
3776                 "OpDecorate %outdata Binding 1\n"
3777                 "OpDecorate %f16arr ArrayStride 2\n"
3778                 "OpMemberDecorate %buf 0 Offset 0\n"
3779
3780                 "%f16      = OpTypeFloat 16\n"
3781                 "%f16ptr   = OpTypePointer Uniform %f16\n"
3782                 "%f16arr   = OpTypeRuntimeArray %f16\n"
3783
3784                 + string(getComputeAsmCommonTypes()) +
3785
3786                 "%buf      = OpTypeStruct %f16arr\n"
3787                 "%bufptr   = OpTypePointer Uniform %buf\n"
3788                 "%indata   = OpVariable %bufptr Uniform\n"
3789                 "%outdata  = OpVariable %bufptr Uniform\n"
3790
3791                 "%id       = OpVariable %uvec3ptr Input\n"
3792                 "%zero     = OpConstant %i32 0\n"
3793                 "%float_0  = OpConstant %f32 0.0\n"
3794                 "%float_1  = OpConstant %f32 1.0\n"
3795                 "%float_n1 = OpConstant %f32 -1.0\n"
3796
3797                 "%main     = OpFunction %void None %voidf\n"
3798                 "%entry    = OpLabel\n"
3799                 "%idval    = OpLoad %uvec3 %id\n"
3800                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3801                 "%inloc    = OpAccessChain %f16ptr %indata %zero %x\n"
3802                 "%inval    = OpLoad %f16 %inloc\n"
3803                 "%f32_inval = OpFConvert %f32 %inval\n"
3804
3805                 "%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
3806                 "            OpSelectionMerge %cm None\n"
3807                 "            OpBranchConditional %comp %tb %fb\n"
3808                 "%tb       = OpLabel\n"
3809                 "            OpBranch %cm\n"
3810                 "%fb       = OpLabel\n"
3811                 "            OpBranch %cm\n"
3812                 "%cm       = OpLabel\n"
3813                 "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
3814                 "%f16_res  = OpFConvert %f16 %res\n"
3815
3816                 "%outloc   = OpAccessChain %f16ptr %outdata %zero %x\n"
3817                 "            OpStore %outloc %f16_res\n"
3818                 "            OpReturn\n"
3819
3820                 "            OpFunctionEnd\n";
3821         specFloat16.inputs.push_back(BufferSp(new Float16Buffer(inputFloats16)));
3822         specFloat16.outputs.push_back(BufferSp(new Float16Buffer(outputFloats16)));
3823         specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
3824         specFloat16.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3825         specFloat16.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3826
3827         specMat4.assembly =
3828                 string(getComputeAsmShaderPreamble()) +
3829
3830                 "OpSource GLSL 430\n"
3831                 "OpName %main \"main\"\n"
3832                 "OpName %id \"gl_GlobalInvocationID\"\n"
3833
3834                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3835
3836                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3837
3838                 "%id = OpVariable %uvec3ptr Input\n"
3839                 "%v4f32      = OpTypeVector %f32 4\n"
3840                 "%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
3841                 "%zero       = OpConstant %i32 0\n"
3842                 "%float_0    = OpConstant %f32 0.0\n"
3843                 "%float_1    = OpConstant %f32 1.0\n"
3844                 "%float_n1   = OpConstant %f32 -1.0\n"
3845                 "%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
3846                 "%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
3847                 "%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
3848                 "%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
3849                 "%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
3850                 "%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
3851                 "%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
3852                 "%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
3853                 "%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
3854                 "%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
3855
3856                 "%main     = OpFunction %void None %voidf\n"
3857                 "%entry    = OpLabel\n"
3858                 "%idval    = OpLoad %uvec3 %id\n"
3859                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3860                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3861                 "%inval    = OpLoad %f32 %inloc\n"
3862
3863                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3864                 "            OpSelectionMerge %cm None\n"
3865                 "            OpBranchConditional %comp %tb %fb\n"
3866                 "%tb       = OpLabel\n"
3867                 "            OpBranch %cm\n"
3868                 "%fb       = OpLabel\n"
3869                 "            OpBranch %cm\n"
3870                 "%cm       = OpLabel\n"
3871                 "%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
3872                 "%res      = OpCompositeExtract %f32 %mres 2 2\n"
3873
3874                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3875                 "            OpStore %outloc %res\n"
3876                 "            OpReturn\n"
3877
3878                 "            OpFunctionEnd\n";
3879         specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3880         specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3881         specMat4.numWorkGroups = IVec3(numElements, 1, 1);
3882
3883         specVec3.assembly =
3884                 string(getComputeAsmShaderPreamble()) +
3885
3886                 "OpSource GLSL 430\n"
3887                 "OpName %main \"main\"\n"
3888                 "OpName %id \"gl_GlobalInvocationID\"\n"
3889
3890                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3891
3892                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3893
3894                 "%id = OpVariable %uvec3ptr Input\n"
3895                 "%zero       = OpConstant %i32 0\n"
3896                 "%float_0    = OpConstant %f32 0.0\n"
3897                 "%float_1    = OpConstant %f32 1.0\n"
3898                 "%float_n1   = OpConstant %f32 -1.0\n"
3899                 "%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
3900                 "%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
3901
3902                 "%main     = OpFunction %void None %voidf\n"
3903                 "%entry    = OpLabel\n"
3904                 "%idval    = OpLoad %uvec3 %id\n"
3905                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3906                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3907                 "%inval    = OpLoad %f32 %inloc\n"
3908
3909                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3910                 "            OpSelectionMerge %cm None\n"
3911                 "            OpBranchConditional %comp %tb %fb\n"
3912                 "%tb       = OpLabel\n"
3913                 "            OpBranch %cm\n"
3914                 "%fb       = OpLabel\n"
3915                 "            OpBranch %cm\n"
3916                 "%cm       = OpLabel\n"
3917                 "%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
3918                 "%res      = OpCompositeExtract %f32 %vres 2\n"
3919
3920                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3921                 "            OpStore %outloc %res\n"
3922                 "            OpReturn\n"
3923
3924                 "            OpFunctionEnd\n";
3925         specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3926         specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3927         specVec3.numWorkGroups = IVec3(numElements, 1, 1);
3928
3929         specInt.assembly =
3930                 string(getComputeAsmShaderPreamble()) +
3931
3932                 "OpSource GLSL 430\n"
3933                 "OpName %main \"main\"\n"
3934                 "OpName %id \"gl_GlobalInvocationID\"\n"
3935
3936                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3937
3938                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3939
3940                 "%id = OpVariable %uvec3ptr Input\n"
3941                 "%zero       = OpConstant %i32 0\n"
3942                 "%float_0    = OpConstant %f32 0.0\n"
3943                 "%i1         = OpConstant %i32 1\n"
3944                 "%i2         = OpConstant %i32 -1\n"
3945
3946                 "%main     = OpFunction %void None %voidf\n"
3947                 "%entry    = OpLabel\n"
3948                 "%idval    = OpLoad %uvec3 %id\n"
3949                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3950                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3951                 "%inval    = OpLoad %f32 %inloc\n"
3952
3953                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3954                 "            OpSelectionMerge %cm None\n"
3955                 "            OpBranchConditional %comp %tb %fb\n"
3956                 "%tb       = OpLabel\n"
3957                 "            OpBranch %cm\n"
3958                 "%fb       = OpLabel\n"
3959                 "            OpBranch %cm\n"
3960                 "%cm       = OpLabel\n"
3961                 "%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
3962                 "%res      = OpConvertSToF %f32 %ires\n"
3963
3964                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3965                 "            OpStore %outloc %res\n"
3966                 "            OpReturn\n"
3967
3968                 "            OpFunctionEnd\n";
3969         specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3970         specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3971         specInt.numWorkGroups = IVec3(numElements, 1, 1);
3972
3973         specArray.assembly =
3974                 string(getComputeAsmShaderPreamble()) +
3975
3976                 "OpSource GLSL 430\n"
3977                 "OpName %main \"main\"\n"
3978                 "OpName %id \"gl_GlobalInvocationID\"\n"
3979
3980                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3981
3982                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3983
3984                 "%id = OpVariable %uvec3ptr Input\n"
3985                 "%zero       = OpConstant %i32 0\n"
3986                 "%u7         = OpConstant %u32 7\n"
3987                 "%float_0    = OpConstant %f32 0.0\n"
3988                 "%float_1    = OpConstant %f32 1.0\n"
3989                 "%float_n1   = OpConstant %f32 -1.0\n"
3990                 "%f32a7      = OpTypeArray %f32 %u7\n"
3991                 "%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
3992                 "%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
3993                 "%main     = OpFunction %void None %voidf\n"
3994                 "%entry    = OpLabel\n"
3995                 "%idval    = OpLoad %uvec3 %id\n"
3996                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3997                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3998                 "%inval    = OpLoad %f32 %inloc\n"
3999
4000                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4001                 "            OpSelectionMerge %cm None\n"
4002                 "            OpBranchConditional %comp %tb %fb\n"
4003                 "%tb       = OpLabel\n"
4004                 "            OpBranch %cm\n"
4005                 "%fb       = OpLabel\n"
4006                 "            OpBranch %cm\n"
4007                 "%cm       = OpLabel\n"
4008                 "%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4009                 "%res      = OpCompositeExtract %f32 %ares 5\n"
4010
4011                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4012                 "            OpStore %outloc %res\n"
4013                 "            OpReturn\n"
4014
4015                 "            OpFunctionEnd\n";
4016         specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4017         specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4018         specArray.numWorkGroups = IVec3(numElements, 1, 1);
4019
4020         specStruct.assembly =
4021                 string(getComputeAsmShaderPreamble()) +
4022
4023                 "OpSource GLSL 430\n"
4024                 "OpName %main \"main\"\n"
4025                 "OpName %id \"gl_GlobalInvocationID\"\n"
4026
4027                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4028
4029                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4030
4031                 "%id = OpVariable %uvec3ptr Input\n"
4032                 "%zero       = OpConstant %i32 0\n"
4033                 "%float_0    = OpConstant %f32 0.0\n"
4034                 "%float_1    = OpConstant %f32 1.0\n"
4035                 "%float_n1   = OpConstant %f32 -1.0\n"
4036
4037                 "%v2f32      = OpTypeVector %f32 2\n"
4038                 "%Data2      = OpTypeStruct %f32 %v2f32\n"
4039                 "%Data       = OpTypeStruct %Data2 %f32\n"
4040
4041                 "%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
4042                 "%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
4043                 "%s1         = OpConstantComposite %Data %in1b %float_1\n"
4044                 "%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4045                 "%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
4046                 "%s2         = OpConstantComposite %Data %in2b %float_n1\n"
4047
4048                 "%main     = OpFunction %void None %voidf\n"
4049                 "%entry    = OpLabel\n"
4050                 "%idval    = OpLoad %uvec3 %id\n"
4051                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4052                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4053                 "%inval    = OpLoad %f32 %inloc\n"
4054
4055                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4056                 "            OpSelectionMerge %cm None\n"
4057                 "            OpBranchConditional %comp %tb %fb\n"
4058                 "%tb       = OpLabel\n"
4059                 "            OpBranch %cm\n"
4060                 "%fb       = OpLabel\n"
4061                 "            OpBranch %cm\n"
4062                 "%cm       = OpLabel\n"
4063                 "%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
4064                 "%res      = OpCompositeExtract %f32 %sres 0 0\n"
4065
4066                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4067                 "            OpStore %outloc %res\n"
4068                 "            OpReturn\n"
4069
4070                 "            OpFunctionEnd\n";
4071         specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4072         specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4073         specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4074
4075         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4076         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4077         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4078         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4079         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4080         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4081         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4082 }
4083
4084 string generateConstantDefinitions (int count)
4085 {
4086         std::ostringstream      r;
4087         for (int i = 0; i < count; i++)
4088                 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4089         r << "\n";
4090         return r.str();
4091 }
4092
4093 string generateSwitchCases (int count)
4094 {
4095         std::ostringstream      r;
4096         for (int i = 0; i < count; i++)
4097                 r << " " << i << " %case" << i;
4098         r << "\n";
4099         return r.str();
4100 }
4101
4102 string generateSwitchTargets (int count)
4103 {
4104         std::ostringstream      r;
4105         for (int i = 0; i < count; i++)
4106                 r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
4107         r << "\n";
4108         return r.str();
4109 }
4110
4111 string generateOpPhiParams (int count)
4112 {
4113         std::ostringstream      r;
4114         for (int i = 0; i < count; i++)
4115                 r << " %cf" << (i * 10 + 5) << " %case" << i;
4116         r << "\n";
4117         return r.str();
4118 }
4119
4120 string generateIntWidth (int value)
4121 {
4122         std::ostringstream      r;
4123         r << value;
4124         return r.str();
4125 }
4126
4127 // Expand input string by injecting "ABC" between the input
4128 // string characters. The acc/add/treshold parameters are used
4129 // to skip some of the injections to make the result less
4130 // uniform (and a lot shorter).
4131 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4132 {
4133         std::ostringstream      res;
4134         const char*                     p = s.c_str();
4135
4136         while (*p)
4137         {
4138                 res << *p;
4139                 acc += add;
4140                 if (acc > treshold)
4141                 {
4142                         acc -= treshold;
4143                         res << "ABC";
4144                 }
4145                 p++;
4146         }
4147         return res.str();
4148 }
4149
4150 // Calculate expected result based on the code string
4151 float calcOpPhiCase5 (float val, const string& s)
4152 {
4153         const char*             p               = s.c_str();
4154         float                   x[8];
4155         bool                    b[8];
4156         const float             tv[8]   = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4157         const float             v               = deFloatAbs(val);
4158         float                   res             = 0;
4159         int                             depth   = -1;
4160         int                             skip    = 0;
4161
4162         for (int i = 7; i >= 0; --i)
4163                 x[i] = std::fmod((float)v, (float)(2 << i));
4164         for (int i = 7; i >= 0; --i)
4165                 b[i] = x[i] > tv[i];
4166
4167         while (*p)
4168         {
4169                 if (*p == 'A')
4170                 {
4171                         depth++;
4172                         if (skip == 0 && b[depth])
4173                         {
4174                                 res++;
4175                         }
4176                         else
4177                                 skip++;
4178                 }
4179                 if (*p == 'B')
4180                 {
4181                         if (skip)
4182                                 skip--;
4183                         if (b[depth] || skip)
4184                                 skip++;
4185                 }
4186                 if (*p == 'C')
4187                 {
4188                         depth--;
4189                         if (skip)
4190                                 skip--;
4191                 }
4192                 p++;
4193         }
4194         return res;
4195 }
4196
4197 // In the code string, the letters represent the following:
4198 //
4199 // A:
4200 //     if (certain bit is set)
4201 //     {
4202 //       result++;
4203 //
4204 // B:
4205 //     } else {
4206 //
4207 // C:
4208 //     }
4209 //
4210 // examples:
4211 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4212 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4213 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4214 //
4215 // Code generation gets a bit complicated due to the else-branches,
4216 // which do not generate new values. Thus, the generator needs to
4217 // keep track of the previous variable change seen by the else
4218 // branch.
4219 string generateOpPhiCase5 (const string& s)
4220 {
4221         std::stack<int>                         idStack;
4222         std::stack<std::string>         value;
4223         std::stack<std::string>         valueLabel;
4224         std::stack<std::string>         mergeLeft;
4225         std::stack<std::string>         mergeRight;
4226         std::ostringstream                      res;
4227         const char*                                     p                       = s.c_str();
4228         int                                                     depth           = -1;
4229         int                                                     currId          = 0;
4230         int                                                     iter            = 0;
4231
4232         idStack.push(-1);
4233         value.push("%f32_0");
4234         valueLabel.push("%f32_0 %entry");
4235
4236         while (*p)
4237         {
4238                 if (*p == 'A')
4239                 {
4240                         depth++;
4241                         currId = iter;
4242                         idStack.push(currId);
4243                         res << "\tOpSelectionMerge %m" << currId << " None\n";
4244                         res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4245                         res << "%t" << currId << " = OpLabel\n";
4246                         res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4247                         std::ostringstream tag;
4248                         tag << "%rt" << currId;
4249                         value.push(tag.str());
4250                         tag << " %t" << currId;
4251                         valueLabel.push(tag.str());
4252                 }
4253
4254                 if (*p == 'B')
4255                 {
4256                         mergeLeft.push(valueLabel.top());
4257                         value.pop();
4258                         valueLabel.pop();
4259                         res << "\tOpBranch %m" << currId << "\n";
4260                         res << "%f" << currId << " = OpLabel\n";
4261                         std::ostringstream tag;
4262                         tag << value.top() << " %f" << currId;
4263                         valueLabel.pop();
4264                         valueLabel.push(tag.str());
4265                 }
4266
4267                 if (*p == 'C')
4268                 {
4269                         mergeRight.push(valueLabel.top());
4270                         res << "\tOpBranch %m" << currId << "\n";
4271                         res << "%m" << currId << " = OpLabel\n";
4272                         if (*(p + 1) == 0)
4273                                 res << "%res"; // last result goes to %res
4274                         else
4275                                 res << "%rm" << currId;
4276                         res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
4277                         std::ostringstream tag;
4278                         tag << "%rm" << currId;
4279                         value.pop();
4280                         value.push(tag.str());
4281                         tag << " %m" << currId;
4282                         valueLabel.pop();
4283                         valueLabel.push(tag.str());
4284                         mergeLeft.pop();
4285                         mergeRight.pop();
4286                         depth--;
4287                         idStack.pop();
4288                         currId = idStack.top();
4289                 }
4290                 p++;
4291                 iter++;
4292         }
4293         return res.str();
4294 }
4295
4296 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4297 {
4298         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4299         ComputeShaderSpec                               spec1;
4300         ComputeShaderSpec                               spec2;
4301         ComputeShaderSpec                               spec3;
4302         ComputeShaderSpec                               spec4;
4303         ComputeShaderSpec                               spec5;
4304         de::Random                                              rnd                             (deStringHash(group->getName()));
4305         const int                                               numElements             = 100;
4306         vector<float>                                   inputFloats             (numElements, 0);
4307         vector<float>                                   outputFloats1   (numElements, 0);
4308         vector<float>                                   outputFloats2   (numElements, 0);
4309         vector<float>                                   outputFloats3   (numElements, 0);
4310         vector<float>                                   outputFloats4   (numElements, 0);
4311         vector<float>                                   outputFloats5   (numElements, 0);
4312         std::string                                             codestring              = "ABC";
4313         const int                                               test4Width              = 1024;
4314
4315         // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4316         // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4317         // shader code.
4318         for (int i = 0, acc = 0; i < 9; i++)
4319                 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4320
4321         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4322
4323         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4324         floorAll(inputFloats);
4325
4326         for (size_t ndx = 0; ndx < numElements; ++ndx)
4327         {
4328                 switch (ndx % 3)
4329                 {
4330                         case 0:         outputFloats1[ndx] = inputFloats[ndx] + 5.5f;   break;
4331                         case 1:         outputFloats1[ndx] = inputFloats[ndx] + 20.5f;  break;
4332                         case 2:         outputFloats1[ndx] = inputFloats[ndx] + 1.75f;  break;
4333                         default:        break;
4334                 }
4335                 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4336                 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4337
4338                 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4339                 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4340
4341                 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4342         }
4343
4344         spec1.assembly =
4345                 string(getComputeAsmShaderPreamble()) +
4346
4347                 "OpSource GLSL 430\n"
4348                 "OpName %main \"main\"\n"
4349                 "OpName %id \"gl_GlobalInvocationID\"\n"
4350
4351                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4352
4353                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4354
4355                 "%id = OpVariable %uvec3ptr Input\n"
4356                 "%zero       = OpConstant %i32 0\n"
4357                 "%three      = OpConstant %u32 3\n"
4358                 "%constf5p5  = OpConstant %f32 5.5\n"
4359                 "%constf20p5 = OpConstant %f32 20.5\n"
4360                 "%constf1p75 = OpConstant %f32 1.75\n"
4361                 "%constf8p5  = OpConstant %f32 8.5\n"
4362                 "%constf6p5  = OpConstant %f32 6.5\n"
4363
4364                 "%main     = OpFunction %void None %voidf\n"
4365                 "%entry    = OpLabel\n"
4366                 "%idval    = OpLoad %uvec3 %id\n"
4367                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4368                 "%selector = OpUMod %u32 %x %three\n"
4369                 "            OpSelectionMerge %phi None\n"
4370                 "            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4371
4372                 // Case 1 before OpPhi.
4373                 "%case1    = OpLabel\n"
4374                 "            OpBranch %phi\n"
4375
4376                 "%default  = OpLabel\n"
4377                 "            OpUnreachable\n"
4378
4379                 "%phi      = OpLabel\n"
4380                 "%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
4381                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4382                 "%inval    = OpLoad %f32 %inloc\n"
4383                 "%add      = OpFAdd %f32 %inval %operand\n"
4384                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4385                 "            OpStore %outloc %add\n"
4386                 "            OpReturn\n"
4387
4388                 // Case 0 after OpPhi.
4389                 "%case0    = OpLabel\n"
4390                 "            OpBranch %phi\n"
4391
4392
4393                 // Case 2 after OpPhi.
4394                 "%case2    = OpLabel\n"
4395                 "            OpBranch %phi\n"
4396
4397                 "            OpFunctionEnd\n";
4398         spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4399         spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4400         spec1.numWorkGroups = IVec3(numElements, 1, 1);
4401
4402         group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4403
4404         spec2.assembly =
4405                 string(getComputeAsmShaderPreamble()) +
4406
4407                 "OpName %main \"main\"\n"
4408                 "OpName %id \"gl_GlobalInvocationID\"\n"
4409
4410                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4411
4412                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4413
4414                 "%id         = OpVariable %uvec3ptr Input\n"
4415                 "%zero       = OpConstant %i32 0\n"
4416                 "%one        = OpConstant %i32 1\n"
4417                 "%three      = OpConstant %i32 3\n"
4418                 "%constf6p5  = OpConstant %f32 6.5\n"
4419
4420                 "%main       = OpFunction %void None %voidf\n"
4421                 "%entry      = OpLabel\n"
4422                 "%idval      = OpLoad %uvec3 %id\n"
4423                 "%x          = OpCompositeExtract %u32 %idval 0\n"
4424                 "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4425                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4426                 "%inval      = OpLoad %f32 %inloc\n"
4427                 "              OpBranch %phi\n"
4428
4429                 "%phi        = OpLabel\n"
4430                 "%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
4431                 "%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
4432                 "%step_next  = OpIAdd %i32 %step %one\n"
4433                 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4434                 "%still_loop = OpSLessThan %bool %step %three\n"
4435                 "              OpLoopMerge %exit %phi None\n"
4436                 "              OpBranchConditional %still_loop %phi %exit\n"
4437
4438                 "%exit       = OpLabel\n"
4439                 "              OpStore %outloc %accum\n"
4440                 "              OpReturn\n"
4441                 "              OpFunctionEnd\n";
4442         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4443         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4444         spec2.numWorkGroups = IVec3(numElements, 1, 1);
4445
4446         group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4447
4448         spec3.assembly =
4449                 string(getComputeAsmShaderPreamble()) +
4450
4451                 "OpName %main \"main\"\n"
4452                 "OpName %id \"gl_GlobalInvocationID\"\n"
4453
4454                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4455
4456                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4457
4458                 "%f32ptr_f   = OpTypePointer Function %f32\n"
4459                 "%id         = OpVariable %uvec3ptr Input\n"
4460                 "%true       = OpConstantTrue %bool\n"
4461                 "%false      = OpConstantFalse %bool\n"
4462                 "%zero       = OpConstant %i32 0\n"
4463                 "%constf8p5  = OpConstant %f32 8.5\n"
4464
4465                 "%main       = OpFunction %void None %voidf\n"
4466                 "%entry      = OpLabel\n"
4467                 "%b          = OpVariable %f32ptr_f Function %constf8p5\n"
4468                 "%idval      = OpLoad %uvec3 %id\n"
4469                 "%x          = OpCompositeExtract %u32 %idval 0\n"
4470                 "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4471                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4472                 "%a_init     = OpLoad %f32 %inloc\n"
4473                 "%b_init     = OpLoad %f32 %b\n"
4474                 "              OpBranch %phi\n"
4475
4476                 "%phi        = OpLabel\n"
4477                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
4478                 "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
4479                 "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
4480                 "              OpLoopMerge %exit %phi None\n"
4481                 "              OpBranchConditional %still_loop %phi %exit\n"
4482
4483                 "%exit       = OpLabel\n"
4484                 "%sub        = OpFSub %f32 %a_next %b_next\n"
4485                 "              OpStore %outloc %sub\n"
4486                 "              OpReturn\n"
4487                 "              OpFunctionEnd\n";
4488         spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4489         spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4490         spec3.numWorkGroups = IVec3(numElements, 1, 1);
4491
4492         group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4493
4494         spec4.assembly =
4495                 "OpCapability Shader\n"
4496                 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4497                 "OpMemoryModel Logical GLSL450\n"
4498                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4499                 "OpExecutionMode %main LocalSize 1 1 1\n"
4500
4501                 "OpSource GLSL 430\n"
4502                 "OpName %main \"main\"\n"
4503                 "OpName %id \"gl_GlobalInvocationID\"\n"
4504
4505                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4506
4507                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4508
4509                 "%id       = OpVariable %uvec3ptr Input\n"
4510                 "%zero     = OpConstant %i32 0\n"
4511                 "%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4512
4513                 + generateConstantDefinitions(test4Width) +
4514
4515                 "%main     = OpFunction %void None %voidf\n"
4516                 "%entry    = OpLabel\n"
4517                 "%idval    = OpLoad %uvec3 %id\n"
4518                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4519                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4520                 "%inval    = OpLoad %f32 %inloc\n"
4521                 "%xf       = OpConvertUToF %f32 %x\n"
4522                 "%xm       = OpFMul %f32 %xf %inval\n"
4523                 "%xa       = OpExtInst %f32 %ext FAbs %xm\n"
4524                 "%xi       = OpConvertFToU %u32 %xa\n"
4525                 "%selector = OpUMod %u32 %xi %cimod\n"
4526                 "            OpSelectionMerge %phi None\n"
4527                 "            OpSwitch %selector %default "
4528
4529                 + generateSwitchCases(test4Width) +
4530
4531                 "%default  = OpLabel\n"
4532                 "            OpUnreachable\n"
4533
4534                 + generateSwitchTargets(test4Width) +
4535
4536                 "%phi      = OpLabel\n"
4537                 "%result   = OpPhi %f32"
4538
4539                 + generateOpPhiParams(test4Width) +
4540
4541                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4542                 "            OpStore %outloc %result\n"
4543                 "            OpReturn\n"
4544
4545                 "            OpFunctionEnd\n";
4546         spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4547         spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4548         spec4.numWorkGroups = IVec3(numElements, 1, 1);
4549
4550         group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4551
4552         spec5.assembly =
4553                 "OpCapability Shader\n"
4554                 "%ext      = OpExtInstImport \"GLSL.std.450\"\n"
4555                 "OpMemoryModel Logical GLSL450\n"
4556                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4557                 "OpExecutionMode %main LocalSize 1 1 1\n"
4558                 "%code     = OpString \"" + codestring + "\"\n"
4559
4560                 "OpSource GLSL 430\n"
4561                 "OpName %main \"main\"\n"
4562                 "OpName %id \"gl_GlobalInvocationID\"\n"
4563
4564                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4565
4566                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4567
4568                 "%id       = OpVariable %uvec3ptr Input\n"
4569                 "%zero     = OpConstant %i32 0\n"
4570                 "%f32_0    = OpConstant %f32 0.0\n"
4571                 "%f32_0_5  = OpConstant %f32 0.5\n"
4572                 "%f32_1    = OpConstant %f32 1.0\n"
4573                 "%f32_1_5  = OpConstant %f32 1.5\n"
4574                 "%f32_2    = OpConstant %f32 2.0\n"
4575                 "%f32_3_5  = OpConstant %f32 3.5\n"
4576                 "%f32_4    = OpConstant %f32 4.0\n"
4577                 "%f32_7_5  = OpConstant %f32 7.5\n"
4578                 "%f32_8    = OpConstant %f32 8.0\n"
4579                 "%f32_15_5 = OpConstant %f32 15.5\n"
4580                 "%f32_16   = OpConstant %f32 16.0\n"
4581                 "%f32_31_5 = OpConstant %f32 31.5\n"
4582                 "%f32_32   = OpConstant %f32 32.0\n"
4583                 "%f32_63_5 = OpConstant %f32 63.5\n"
4584                 "%f32_64   = OpConstant %f32 64.0\n"
4585                 "%f32_127_5 = OpConstant %f32 127.5\n"
4586                 "%f32_128  = OpConstant %f32 128.0\n"
4587                 "%f32_256  = OpConstant %f32 256.0\n"
4588
4589                 "%main     = OpFunction %void None %voidf\n"
4590                 "%entry    = OpLabel\n"
4591                 "%idval    = OpLoad %uvec3 %id\n"
4592                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4593                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4594                 "%inval    = OpLoad %f32 %inloc\n"
4595
4596                 "%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
4597                 "%x8       = OpFMod %f32 %xabs %f32_256\n"
4598                 "%x7       = OpFMod %f32 %xabs %f32_128\n"
4599                 "%x6       = OpFMod %f32 %xabs %f32_64\n"
4600                 "%x5       = OpFMod %f32 %xabs %f32_32\n"
4601                 "%x4       = OpFMod %f32 %xabs %f32_16\n"
4602                 "%x3       = OpFMod %f32 %xabs %f32_8\n"
4603                 "%x2       = OpFMod %f32 %xabs %f32_4\n"
4604                 "%x1       = OpFMod %f32 %xabs %f32_2\n"
4605
4606                 "%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4607                 "%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4608                 "%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4609                 "%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4610                 "%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4611                 "%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4612                 "%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4613                 "%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4614
4615                 + generateOpPhiCase5(codestring) +
4616
4617                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4618                 "            OpStore %outloc %res\n"
4619                 "            OpReturn\n"
4620
4621                 "            OpFunctionEnd\n";
4622         spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4623         spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4624         spec5.numWorkGroups = IVec3(numElements, 1, 1);
4625
4626         group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4627
4628         createOpPhiVartypeTests(group, testCtx);
4629
4630         return group.release();
4631 }
4632
4633 // Assembly code used for testing block order is based on GLSL source code:
4634 //
4635 // #version 430
4636 //
4637 // layout(std140, set = 0, binding = 0) readonly buffer Input {
4638 //   float elements[];
4639 // } input_data;
4640 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
4641 //   float elements[];
4642 // } output_data;
4643 //
4644 // void main() {
4645 //   uint x = gl_GlobalInvocationID.x;
4646 //   output_data.elements[x] = input_data.elements[x];
4647 //   if (x > uint(50)) {
4648 //     switch (x % uint(3)) {
4649 //       case 0: output_data.elements[x] += 1.5f; break;
4650 //       case 1: output_data.elements[x] += 42.f; break;
4651 //       case 2: output_data.elements[x] -= 27.f; break;
4652 //       default: break;
4653 //     }
4654 //   } else {
4655 //     output_data.elements[x] = -input_data.elements[x];
4656 //   }
4657 // }
4658 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
4659 {
4660         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
4661         ComputeShaderSpec                               spec;
4662         de::Random                                              rnd                             (deStringHash(group->getName()));
4663         const int                                               numElements             = 100;
4664         vector<float>                                   inputFloats             (numElements, 0);
4665         vector<float>                                   outputFloats    (numElements, 0);
4666
4667         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
4668
4669         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4670         floorAll(inputFloats);
4671
4672         for (size_t ndx = 0; ndx <= 50; ++ndx)
4673                 outputFloats[ndx] = -inputFloats[ndx];
4674
4675         for (size_t ndx = 51; ndx < numElements; ++ndx)
4676         {
4677                 switch (ndx % 3)
4678                 {
4679                         case 0:         outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
4680                         case 1:         outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
4681                         case 2:         outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
4682                         default:        break;
4683                 }
4684         }
4685
4686         spec.assembly =
4687                 string(getComputeAsmShaderPreamble()) +
4688
4689                 "OpSource GLSL 430\n"
4690                 "OpName %main \"main\"\n"
4691                 "OpName %id \"gl_GlobalInvocationID\"\n"
4692
4693                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4694
4695                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4696
4697                 "%u32ptr       = OpTypePointer Function %u32\n"
4698                 "%u32ptr_input = OpTypePointer Input %u32\n"
4699
4700                 + string(getComputeAsmInputOutputBuffer()) +
4701
4702                 "%id        = OpVariable %uvec3ptr Input\n"
4703                 "%zero      = OpConstant %i32 0\n"
4704                 "%const3    = OpConstant %u32 3\n"
4705                 "%const50   = OpConstant %u32 50\n"
4706                 "%constf1p5 = OpConstant %f32 1.5\n"
4707                 "%constf27  = OpConstant %f32 27.0\n"
4708                 "%constf42  = OpConstant %f32 42.0\n"
4709
4710                 "%main = OpFunction %void None %voidf\n"
4711
4712                 // entry block.
4713                 "%entry    = OpLabel\n"
4714
4715                 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
4716                 "%xvar     = OpVariable %u32ptr Function\n"
4717                 "%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
4718                 "%x        = OpLoad %u32 %xptr\n"
4719                 "            OpStore %xvar %x\n"
4720
4721                 "%cmp      = OpUGreaterThan %bool %x %const50\n"
4722                 "            OpSelectionMerge %if_merge None\n"
4723                 "            OpBranchConditional %cmp %if_true %if_false\n"
4724
4725                 // False branch for if-statement: placed in the middle of switch cases and before true branch.
4726                 "%if_false = OpLabel\n"
4727                 "%x_f      = OpLoad %u32 %xvar\n"
4728                 "%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
4729                 "%inval_f  = OpLoad %f32 %inloc_f\n"
4730                 "%negate   = OpFNegate %f32 %inval_f\n"
4731                 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
4732                 "            OpStore %outloc_f %negate\n"
4733                 "            OpBranch %if_merge\n"
4734
4735                 // Merge block for if-statement: placed in the middle of true and false branch.
4736                 "%if_merge = OpLabel\n"
4737                 "            OpReturn\n"
4738
4739                 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
4740                 "%if_true  = OpLabel\n"
4741                 "%xval_t   = OpLoad %u32 %xvar\n"
4742                 "%mod      = OpUMod %u32 %xval_t %const3\n"
4743                 "            OpSelectionMerge %switch_merge None\n"
4744                 "            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
4745
4746                 // Merge block for switch-statement: placed before the case
4747                 // bodies.  But it must follow OpSwitch which dominates it.
4748                 "%switch_merge = OpLabel\n"
4749                 "                OpBranch %if_merge\n"
4750
4751                 // Case 1 for switch-statement: placed before case 0.
4752                 // It must follow the OpSwitch that dominates it.
4753                 "%case1    = OpLabel\n"
4754                 "%x_1      = OpLoad %u32 %xvar\n"
4755                 "%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
4756                 "%inval_1  = OpLoad %f32 %inloc_1\n"
4757                 "%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
4758                 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
4759                 "            OpStore %outloc_1 %addf42\n"
4760                 "            OpBranch %switch_merge\n"
4761
4762                 // Case 2 for switch-statement.
4763                 "%case2    = OpLabel\n"
4764                 "%x_2      = OpLoad %u32 %xvar\n"
4765                 "%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
4766                 "%inval_2  = OpLoad %f32 %inloc_2\n"
4767                 "%subf27   = OpFSub %f32 %inval_2 %constf27\n"
4768                 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
4769                 "            OpStore %outloc_2 %subf27\n"
4770                 "            OpBranch %switch_merge\n"
4771
4772                 // Default case for switch-statement: placed in the middle of normal cases.
4773                 "%default = OpLabel\n"
4774                 "           OpBranch %switch_merge\n"
4775
4776                 // Case 0 for switch-statement: out of order.
4777                 "%case0    = OpLabel\n"
4778                 "%x_0      = OpLoad %u32 %xvar\n"
4779                 "%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
4780                 "%inval_0  = OpLoad %f32 %inloc_0\n"
4781                 "%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
4782                 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
4783                 "            OpStore %outloc_0 %addf1p5\n"
4784                 "            OpBranch %switch_merge\n"
4785
4786                 "            OpFunctionEnd\n";
4787         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4788         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4789         spec.numWorkGroups = IVec3(numElements, 1, 1);
4790
4791         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
4792
4793         return group.release();
4794 }
4795
4796 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
4797 {
4798         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
4799         ComputeShaderSpec                               spec1;
4800         ComputeShaderSpec                               spec2;
4801         de::Random                                              rnd                             (deStringHash(group->getName()));
4802         const int                                               numElements             = 100;
4803         vector<float>                                   inputFloats             (numElements, 0);
4804         vector<float>                                   outputFloats1   (numElements, 0);
4805         vector<float>                                   outputFloats2   (numElements, 0);
4806         fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
4807
4808         for (size_t ndx = 0; ndx < numElements; ++ndx)
4809         {
4810                 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
4811                 outputFloats2[ndx] = -inputFloats[ndx];
4812         }
4813
4814         const string assembly(
4815                 "OpCapability Shader\n"
4816                 "OpMemoryModel Logical GLSL450\n"
4817                 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
4818                 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
4819                 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
4820                 "OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
4821                 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
4822                 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
4823
4824                 "OpName %comp_main1              \"entrypoint1\"\n"
4825                 "OpName %comp_main2              \"entrypoint2\"\n"
4826                 "OpName %vert_main               \"entrypoint2\"\n"
4827                 "OpName %id                      \"gl_GlobalInvocationID\"\n"
4828                 "OpName %vert_builtin_st         \"gl_PerVertex\"\n"
4829                 "OpName %vertexIndex             \"gl_VertexIndex\"\n"
4830                 "OpName %instanceIndex           \"gl_InstanceIndex\"\n"
4831                 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
4832                 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
4833                 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
4834
4835                 "OpDecorate %id                      BuiltIn GlobalInvocationId\n"
4836                 "OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
4837                 "OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
4838                 "OpDecorate %vert_builtin_st         Block\n"
4839                 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
4840                 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
4841                 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
4842
4843                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4844
4845                 "%zero       = OpConstant %i32 0\n"
4846                 "%one        = OpConstant %u32 1\n"
4847                 "%c_f32_1    = OpConstant %f32 1\n"
4848
4849                 "%i32inputptr         = OpTypePointer Input %i32\n"
4850                 "%vec4                = OpTypeVector %f32 4\n"
4851                 "%vec4ptr             = OpTypePointer Output %vec4\n"
4852                 "%f32arr1             = OpTypeArray %f32 %one\n"
4853                 "%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
4854                 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
4855                 "%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
4856
4857                 "%id         = OpVariable %uvec3ptr Input\n"
4858                 "%vertexIndex = OpVariable %i32inputptr Input\n"
4859                 "%instanceIndex = OpVariable %i32inputptr Input\n"
4860                 "%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
4861
4862                 // gl_Position = vec4(1.);
4863                 "%vert_main  = OpFunction %void None %voidf\n"
4864                 "%vert_entry = OpLabel\n"
4865                 "%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
4866                 "              OpStore %position %c_vec4_1\n"
4867                 "              OpReturn\n"
4868                 "              OpFunctionEnd\n"
4869
4870                 // Double inputs.
4871                 "%comp_main1  = OpFunction %void None %voidf\n"
4872                 "%comp1_entry = OpLabel\n"
4873                 "%idval1      = OpLoad %uvec3 %id\n"
4874                 "%x1          = OpCompositeExtract %u32 %idval1 0\n"
4875                 "%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
4876                 "%inval1      = OpLoad %f32 %inloc1\n"
4877                 "%add         = OpFAdd %f32 %inval1 %inval1\n"
4878                 "%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
4879                 "               OpStore %outloc1 %add\n"
4880                 "               OpReturn\n"
4881                 "               OpFunctionEnd\n"
4882
4883                 // Negate inputs.
4884                 "%comp_main2  = OpFunction %void None %voidf\n"
4885                 "%comp2_entry = OpLabel\n"
4886                 "%idval2      = OpLoad %uvec3 %id\n"
4887                 "%x2          = OpCompositeExtract %u32 %idval2 0\n"
4888                 "%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
4889                 "%inval2      = OpLoad %f32 %inloc2\n"
4890                 "%neg         = OpFNegate %f32 %inval2\n"
4891                 "%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
4892                 "               OpStore %outloc2 %neg\n"
4893                 "               OpReturn\n"
4894                 "               OpFunctionEnd\n");
4895
4896         spec1.assembly = assembly;
4897         spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4898         spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4899         spec1.numWorkGroups = IVec3(numElements, 1, 1);
4900         spec1.entryPoint = "entrypoint1";
4901
4902         spec2.assembly = assembly;
4903         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4904         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4905         spec2.numWorkGroups = IVec3(numElements, 1, 1);
4906         spec2.entryPoint = "entrypoint2";
4907
4908         group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
4909         group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
4910
4911         return group.release();
4912 }
4913
4914 inline std::string makeLongUTF8String (size_t num4ByteChars)
4915 {
4916         // An example of a longest valid UTF-8 character.  Be explicit about the
4917         // character type because Microsoft compilers can otherwise interpret the
4918         // character string as being over wide (16-bit) characters. Ideally, we
4919         // would just use a C++11 UTF-8 string literal, but we want to support older
4920         // Microsoft compilers.
4921         const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
4922         std::string longString;
4923         longString.reserve(num4ByteChars * 4);
4924         for (size_t count = 0; count < num4ByteChars; count++)
4925         {
4926                 longString += earthAfrica;
4927         }
4928         return longString;
4929 }
4930
4931 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
4932 {
4933         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
4934         vector<CaseParameter>                   cases;
4935         de::Random                                              rnd                             (deStringHash(group->getName()));
4936         const int                                               numElements             = 100;
4937         vector<float>                                   positiveFloats  (numElements, 0);
4938         vector<float>                                   negativeFloats  (numElements, 0);
4939         const StringTemplate                    shaderTemplate  (
4940                 "OpCapability Shader\n"
4941                 "OpMemoryModel Logical GLSL450\n"
4942
4943                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4944                 "OpExecutionMode %main LocalSize 1 1 1\n"
4945
4946                 "${SOURCE}\n"
4947
4948                 "OpName %main           \"main\"\n"
4949                 "OpName %id             \"gl_GlobalInvocationID\"\n"
4950
4951                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4952
4953                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4954
4955                 "%id        = OpVariable %uvec3ptr Input\n"
4956                 "%zero      = OpConstant %i32 0\n"
4957
4958                 "%main      = OpFunction %void None %voidf\n"
4959                 "%label     = OpLabel\n"
4960                 "%idval     = OpLoad %uvec3 %id\n"
4961                 "%x         = OpCompositeExtract %u32 %idval 0\n"
4962                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
4963                 "%inval     = OpLoad %f32 %inloc\n"
4964                 "%neg       = OpFNegate %f32 %inval\n"
4965                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
4966                 "             OpStore %outloc %neg\n"
4967                 "             OpReturn\n"
4968                 "             OpFunctionEnd\n");
4969
4970         cases.push_back(CaseParameter("unknown_source",                                                 "OpSource Unknown 0"));
4971         cases.push_back(CaseParameter("wrong_source",                                                   "OpSource OpenCL_C 210"));
4972         cases.push_back(CaseParameter("normal_filename",                                                "%fname = OpString \"filename\"\n"
4973                                                                                                                                                         "OpSource GLSL 430 %fname"));
4974         cases.push_back(CaseParameter("empty_filename",                                                 "%fname = OpString \"\"\n"
4975                                                                                                                                                         "OpSource GLSL 430 %fname"));
4976         cases.push_back(CaseParameter("normal_source_code",                                             "%fname = OpString \"filename\"\n"
4977                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
4978         cases.push_back(CaseParameter("empty_source_code",                                              "%fname = OpString \"filename\"\n"
4979                                                                                                                                                         "OpSource GLSL 430 %fname \"\""));
4980         cases.push_back(CaseParameter("long_source_code",                                               "%fname = OpString \"filename\"\n"
4981                                                                                                                                                         "OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
4982         cases.push_back(CaseParameter("utf8_source_code",                                               "%fname = OpString \"filename\"\n"
4983                                                                                                                                                         "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
4984         cases.push_back(CaseParameter("normal_sourcecontinued",                                 "%fname = OpString \"filename\"\n"
4985                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
4986                                                                                                                                                         "OpSourceContinued \"id main() {}\""));
4987         cases.push_back(CaseParameter("empty_sourcecontinued",                                  "%fname = OpString \"filename\"\n"
4988                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
4989                                                                                                                                                         "OpSourceContinued \"\""));
4990         cases.push_back(CaseParameter("long_sourcecontinued",                                   "%fname = OpString \"filename\"\n"
4991                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
4992                                                                                                                                                         "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
4993         cases.push_back(CaseParameter("utf8_sourcecontinued",                                   "%fname = OpString \"filename\"\n"
4994                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
4995                                                                                                                                                         "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
4996         cases.push_back(CaseParameter("multi_sourcecontinued",                                  "%fname = OpString \"filename\"\n"
4997                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\n\"\n"
4998                                                                                                                                                         "OpSourceContinued \"void\"\n"
4999                                                                                                                                                         "OpSourceContinued \"main()\"\n"
5000                                                                                                                                                         "OpSourceContinued \"{}\""));
5001         cases.push_back(CaseParameter("empty_source_before_sourcecontinued",    "%fname = OpString \"filename\"\n"
5002                                                                                                                                                         "OpSource GLSL 430 %fname \"\"\n"
5003                                                                                                                                                         "OpSourceContinued \"#version 430\nvoid main() {}\""));
5004
5005         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5006
5007         for (size_t ndx = 0; ndx < numElements; ++ndx)
5008                 negativeFloats[ndx] = -positiveFloats[ndx];
5009
5010         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5011         {
5012                 map<string, string>             specializations;
5013                 ComputeShaderSpec               spec;
5014
5015                 specializations["SOURCE"] = cases[caseNdx].param;
5016                 spec.assembly = shaderTemplate.specialize(specializations);
5017                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5018                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5019                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5020
5021                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5022         }
5023
5024         return group.release();
5025 }
5026
5027 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5028 {
5029         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5030         vector<CaseParameter>                   cases;
5031         de::Random                                              rnd                             (deStringHash(group->getName()));
5032         const int                                               numElements             = 100;
5033         vector<float>                                   inputFloats             (numElements, 0);
5034         vector<float>                                   outputFloats    (numElements, 0);
5035         const StringTemplate                    shaderTemplate  (
5036                 string(getComputeAsmShaderPreamble()) +
5037
5038                 "OpSourceExtension \"${EXTENSION}\"\n"
5039
5040                 "OpName %main           \"main\"\n"
5041                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5042
5043                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5044
5045                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5046
5047                 "%id        = OpVariable %uvec3ptr Input\n"
5048                 "%zero      = OpConstant %i32 0\n"
5049
5050                 "%main      = OpFunction %void None %voidf\n"
5051                 "%label     = OpLabel\n"
5052                 "%idval     = OpLoad %uvec3 %id\n"
5053                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5054                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5055                 "%inval     = OpLoad %f32 %inloc\n"
5056                 "%neg       = OpFNegate %f32 %inval\n"
5057                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5058                 "             OpStore %outloc %neg\n"
5059                 "             OpReturn\n"
5060                 "             OpFunctionEnd\n");
5061
5062         cases.push_back(CaseParameter("empty_extension",        ""));
5063         cases.push_back(CaseParameter("real_extension",         "GL_ARB_texture_rectangle"));
5064         cases.push_back(CaseParameter("fake_extension",         "GL_ARB_im_the_ultimate_extension"));
5065         cases.push_back(CaseParameter("utf8_extension",         "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5066         cases.push_back(CaseParameter("long_extension",         makeLongUTF8String(65533) + "ccc")); // word count: 65535
5067
5068         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5069
5070         for (size_t ndx = 0; ndx < numElements; ++ndx)
5071                 outputFloats[ndx] = -inputFloats[ndx];
5072
5073         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5074         {
5075                 map<string, string>             specializations;
5076                 ComputeShaderSpec               spec;
5077
5078                 specializations["EXTENSION"] = cases[caseNdx].param;
5079                 spec.assembly = shaderTemplate.specialize(specializations);
5080                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5081                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5082                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5083
5084                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5085         }
5086
5087         return group.release();
5088 }
5089
5090 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
5091 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5092 {
5093         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5094         vector<CaseParameter>                   cases;
5095         de::Random                                              rnd                             (deStringHash(group->getName()));
5096         const int                                               numElements             = 100;
5097         vector<float>                                   positiveFloats  (numElements, 0);
5098         vector<float>                                   negativeFloats  (numElements, 0);
5099         const StringTemplate                    shaderTemplate  (
5100                 string(getComputeAsmShaderPreamble()) +
5101
5102                 "OpSource GLSL 430\n"
5103                 "OpName %main           \"main\"\n"
5104                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5105
5106                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5107
5108                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5109                 "%uvec2     = OpTypeVector %u32 2\n"
5110                 "%bvec3     = OpTypeVector %bool 3\n"
5111                 "%fvec4     = OpTypeVector %f32 4\n"
5112                 "%fmat33    = OpTypeMatrix %fvec3 3\n"
5113                 "%const100  = OpConstant %u32 100\n"
5114                 "%uarr100   = OpTypeArray %i32 %const100\n"
5115                 "%struct    = OpTypeStruct %f32 %i32 %u32\n"
5116                 "%pointer   = OpTypePointer Function %i32\n"
5117                 + string(getComputeAsmInputOutputBuffer()) +
5118
5119                 "%null      = OpConstantNull ${TYPE}\n"
5120
5121                 "%id        = OpVariable %uvec3ptr Input\n"
5122                 "%zero      = OpConstant %i32 0\n"
5123
5124                 "%main      = OpFunction %void None %voidf\n"
5125                 "%label     = OpLabel\n"
5126                 "%idval     = OpLoad %uvec3 %id\n"
5127                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5128                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5129                 "%inval     = OpLoad %f32 %inloc\n"
5130                 "%neg       = OpFNegate %f32 %inval\n"
5131                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5132                 "             OpStore %outloc %neg\n"
5133                 "             OpReturn\n"
5134                 "             OpFunctionEnd\n");
5135
5136         cases.push_back(CaseParameter("bool",                   "%bool"));
5137         cases.push_back(CaseParameter("sint32",                 "%i32"));
5138         cases.push_back(CaseParameter("uint32",                 "%u32"));
5139         cases.push_back(CaseParameter("float32",                "%f32"));
5140         cases.push_back(CaseParameter("vec4float32",    "%fvec4"));
5141         cases.push_back(CaseParameter("vec3bool",               "%bvec3"));
5142         cases.push_back(CaseParameter("vec2uint32",             "%uvec2"));
5143         cases.push_back(CaseParameter("matrix",                 "%fmat33"));
5144         cases.push_back(CaseParameter("array",                  "%uarr100"));
5145         cases.push_back(CaseParameter("struct",                 "%struct"));
5146         cases.push_back(CaseParameter("pointer",                "%pointer"));
5147
5148         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5149
5150         for (size_t ndx = 0; ndx < numElements; ++ndx)
5151                 negativeFloats[ndx] = -positiveFloats[ndx];
5152
5153         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5154         {
5155                 map<string, string>             specializations;
5156                 ComputeShaderSpec               spec;
5157
5158                 specializations["TYPE"] = cases[caseNdx].param;
5159                 spec.assembly = shaderTemplate.specialize(specializations);
5160                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5161                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5162                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5163
5164                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5165         }
5166
5167         return group.release();
5168 }
5169
5170 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5171 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5172 {
5173         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5174         vector<CaseParameter>                   cases;
5175         de::Random                                              rnd                             (deStringHash(group->getName()));
5176         const int                                               numElements             = 100;
5177         vector<float>                                   positiveFloats  (numElements, 0);
5178         vector<float>                                   negativeFloats  (numElements, 0);
5179         const StringTemplate                    shaderTemplate  (
5180                 string(getComputeAsmShaderPreamble()) +
5181
5182                 "OpSource GLSL 430\n"
5183                 "OpName %main           \"main\"\n"
5184                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5185
5186                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5187
5188                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5189
5190                 "%id        = OpVariable %uvec3ptr Input\n"
5191                 "%zero      = OpConstant %i32 0\n"
5192
5193                 "${CONSTANT}\n"
5194
5195                 "%main      = OpFunction %void None %voidf\n"
5196                 "%label     = OpLabel\n"
5197                 "%idval     = OpLoad %uvec3 %id\n"
5198                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5199                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5200                 "%inval     = OpLoad %f32 %inloc\n"
5201                 "%neg       = OpFNegate %f32 %inval\n"
5202                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5203                 "             OpStore %outloc %neg\n"
5204                 "             OpReturn\n"
5205                 "             OpFunctionEnd\n");
5206
5207         cases.push_back(CaseParameter("vector",                 "%five = OpConstant %u32 5\n"
5208                                                                                                         "%const = OpConstantComposite %uvec3 %five %zero %five"));
5209         cases.push_back(CaseParameter("matrix",                 "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5210                                                                                                         "%ten = OpConstant %f32 10.\n"
5211                                                                                                         "%fzero = OpConstant %f32 0.\n"
5212                                                                                                         "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5213                                                                                                         "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5214         cases.push_back(CaseParameter("struct",                 "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5215                                                                                                         "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5216                                                                                                         "%fzero = OpConstant %f32 0.\n"
5217                                                                                                         "%one = OpConstant %f32 1.\n"
5218                                                                                                         "%point5 = OpConstant %f32 0.5\n"
5219                                                                                                         "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5220                                                                                                         "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5221                                                                                                         "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5222         cases.push_back(CaseParameter("nested_struct",  "%st1 = OpTypeStruct %u32 %f32\n"
5223                                                                                                         "%st2 = OpTypeStruct %i32 %i32\n"
5224                                                                                                         "%struct = OpTypeStruct %st1 %st2\n"
5225                                                                                                         "%point5 = OpConstant %f32 0.5\n"
5226                                                                                                         "%one = OpConstant %u32 1\n"
5227                                                                                                         "%ten = OpConstant %i32 10\n"
5228                                                                                                         "%st1val = OpConstantComposite %st1 %one %point5\n"
5229                                                                                                         "%st2val = OpConstantComposite %st2 %ten %ten\n"
5230                                                                                                         "%const = OpConstantComposite %struct %st1val %st2val"));
5231
5232         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5233
5234         for (size_t ndx = 0; ndx < numElements; ++ndx)
5235                 negativeFloats[ndx] = -positiveFloats[ndx];
5236
5237         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5238         {
5239                 map<string, string>             specializations;
5240                 ComputeShaderSpec               spec;
5241
5242                 specializations["CONSTANT"] = cases[caseNdx].param;
5243                 spec.assembly = shaderTemplate.specialize(specializations);
5244                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5245                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5246                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5247
5248                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5249         }
5250
5251         return group.release();
5252 }
5253
5254 // Creates a floating point number with the given exponent, and significand
5255 // bits set. It can only create normalized numbers. Only the least significant
5256 // 24 bits of the significand will be examined. The final bit of the
5257 // significand will also be ignored. This allows alignment to be written
5258 // similarly to C99 hex-floats.
5259 // For example if you wanted to write 0x1.7f34p-12 you would call
5260 // constructNormalizedFloat(-12, 0x7f3400)
5261 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5262 {
5263         float f = 1.0f;
5264
5265         for (deInt32 idx = 0; idx < 23; ++idx)
5266         {
5267                 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5268                 significand <<= 1;
5269         }
5270
5271         return std::ldexp(f, exponent);
5272 }
5273
5274 // Compare instruction for the OpQuantizeF16 compute exact case.
5275 // Returns true if the output is what is expected from the test case.
5276 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5277 {
5278         if (outputAllocs.size() != 1)
5279                 return false;
5280
5281         // Only size is needed because we cannot compare Nans.
5282         size_t byteSize = expectedOutputs[0].getByteSize();
5283
5284         const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5285
5286         if (byteSize != 4*sizeof(float)) {
5287                 return false;
5288         }
5289
5290         if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5291                 *outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5292                 return false;
5293         }
5294         outputAsFloat++;
5295
5296         if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5297                 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5298                 return false;
5299         }
5300         outputAsFloat++;
5301
5302         if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5303                 *outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5304                 return false;
5305         }
5306         outputAsFloat++;
5307
5308         if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5309                 *outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5310                 return false;
5311         }
5312
5313         return true;
5314 }
5315
5316 // Checks that every output from a test-case is a float NaN.
5317 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5318 {
5319         if (outputAllocs.size() != 1)
5320                 return false;
5321
5322         // Only size is needed because we cannot compare Nans.
5323         size_t byteSize = expectedOutputs[0].getByteSize();
5324
5325         const float* const      output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5326
5327         for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5328         {
5329                 if (!deFloatIsNaN(output_as_float[idx]))
5330                 {
5331                         return false;
5332                 }
5333         }
5334
5335         return true;
5336 }
5337
5338 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5339 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5340 {
5341         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5342
5343         const std::string shader (
5344                 string(getComputeAsmShaderPreamble()) +
5345
5346                 "OpSource GLSL 430\n"
5347                 "OpName %main           \"main\"\n"
5348                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5349
5350                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5351
5352                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5353
5354                 "%id        = OpVariable %uvec3ptr Input\n"
5355                 "%zero      = OpConstant %i32 0\n"
5356
5357                 "%main      = OpFunction %void None %voidf\n"
5358                 "%label     = OpLabel\n"
5359                 "%idval     = OpLoad %uvec3 %id\n"
5360                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5361                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5362                 "%inval     = OpLoad %f32 %inloc\n"
5363                 "%quant     = OpQuantizeToF16 %f32 %inval\n"
5364                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5365                 "             OpStore %outloc %quant\n"
5366                 "             OpReturn\n"
5367                 "             OpFunctionEnd\n");
5368
5369         {
5370                 ComputeShaderSpec       spec;
5371                 const deUint32          numElements             = 100;
5372                 vector<float>           infinities;
5373                 vector<float>           results;
5374
5375                 infinities.reserve(numElements);
5376                 results.reserve(numElements);
5377
5378                 for (size_t idx = 0; idx < numElements; ++idx)
5379                 {
5380                         switch(idx % 4)
5381                         {
5382                                 case 0:
5383                                         infinities.push_back(std::numeric_limits<float>::infinity());
5384                                         results.push_back(std::numeric_limits<float>::infinity());
5385                                         break;
5386                                 case 1:
5387                                         infinities.push_back(-std::numeric_limits<float>::infinity());
5388                                         results.push_back(-std::numeric_limits<float>::infinity());
5389                                         break;
5390                                 case 2:
5391                                         infinities.push_back(std::ldexp(1.0f, 16));
5392                                         results.push_back(std::numeric_limits<float>::infinity());
5393                                         break;
5394                                 case 3:
5395                                         infinities.push_back(std::ldexp(-1.0f, 32));
5396                                         results.push_back(-std::numeric_limits<float>::infinity());
5397                                         break;
5398                         }
5399                 }
5400
5401                 spec.assembly = shader;
5402                 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5403                 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5404                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5405
5406                 group->addChild(new SpvAsmComputeShaderCase(
5407                         testCtx, "infinities", "Check that infinities propagated and created", spec));
5408         }
5409
5410         {
5411                 ComputeShaderSpec       spec;
5412                 vector<float>           nans;
5413                 const deUint32          numElements             = 100;
5414
5415                 nans.reserve(numElements);
5416
5417                 for (size_t idx = 0; idx < numElements; ++idx)
5418                 {
5419                         if (idx % 2 == 0)
5420                         {
5421                                 nans.push_back(std::numeric_limits<float>::quiet_NaN());
5422                         }
5423                         else
5424                         {
5425                                 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5426                         }
5427                 }
5428
5429                 spec.assembly = shader;
5430                 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5431                 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5432                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5433                 spec.verifyIO = &compareNan;
5434
5435                 group->addChild(new SpvAsmComputeShaderCase(
5436                         testCtx, "propagated_nans", "Check that nans are propagated", spec));
5437         }
5438
5439         {
5440                 ComputeShaderSpec       spec;
5441                 vector<float>           small;
5442                 vector<float>           zeros;
5443                 const deUint32          numElements             = 100;
5444
5445                 small.reserve(numElements);
5446                 zeros.reserve(numElements);
5447
5448                 for (size_t idx = 0; idx < numElements; ++idx)
5449                 {
5450                         switch(idx % 6)
5451                         {
5452                                 case 0:
5453                                         small.push_back(0.f);
5454                                         zeros.push_back(0.f);
5455                                         break;
5456                                 case 1:
5457                                         small.push_back(-0.f);
5458                                         zeros.push_back(-0.f);
5459                                         break;
5460                                 case 2:
5461                                         small.push_back(std::ldexp(1.0f, -16));
5462                                         zeros.push_back(0.f);
5463                                         break;
5464                                 case 3:
5465                                         small.push_back(std::ldexp(-1.0f, -32));
5466                                         zeros.push_back(-0.f);
5467                                         break;
5468                                 case 4:
5469                                         small.push_back(std::ldexp(1.0f, -127));
5470                                         zeros.push_back(0.f);
5471                                         break;
5472                                 case 5:
5473                                         small.push_back(-std::ldexp(1.0f, -128));
5474                                         zeros.push_back(-0.f);
5475                                         break;
5476                         }
5477                 }
5478
5479                 spec.assembly = shader;
5480                 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5481                 spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5482                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5483
5484                 group->addChild(new SpvAsmComputeShaderCase(
5485                         testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5486         }
5487
5488         {
5489                 ComputeShaderSpec       spec;
5490                 vector<float>           exact;
5491                 const deUint32          numElements             = 200;
5492
5493                 exact.reserve(numElements);
5494
5495                 for (size_t idx = 0; idx < numElements; ++idx)
5496                         exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5497
5498                 spec.assembly = shader;
5499                 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5500                 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5501                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5502
5503                 group->addChild(new SpvAsmComputeShaderCase(
5504                         testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5505         }
5506
5507         {
5508                 ComputeShaderSpec       spec;
5509                 vector<float>           inputs;
5510                 const deUint32          numElements             = 4;
5511
5512                 inputs.push_back(constructNormalizedFloat(8,    0x300300));
5513                 inputs.push_back(-constructNormalizedFloat(-7,  0x600800));
5514                 inputs.push_back(constructNormalizedFloat(2,    0x01E000));
5515                 inputs.push_back(constructNormalizedFloat(1,    0xFFE000));
5516
5517                 spec.assembly = shader;
5518                 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5519                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5520                 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5521                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5522
5523                 group->addChild(new SpvAsmComputeShaderCase(
5524                         testCtx, "rounded", "Check that are rounded when needed", spec));
5525         }
5526
5527         return group.release();
5528 }
5529
5530 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5531 {
5532         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5533
5534         const std::string shader (
5535                 string(getComputeAsmShaderPreamble()) +
5536
5537                 "OpName %main           \"main\"\n"
5538                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5539
5540                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5541
5542                 "OpDecorate %sc_0  SpecId 0\n"
5543                 "OpDecorate %sc_1  SpecId 1\n"
5544                 "OpDecorate %sc_2  SpecId 2\n"
5545                 "OpDecorate %sc_3  SpecId 3\n"
5546                 "OpDecorate %sc_4  SpecId 4\n"
5547                 "OpDecorate %sc_5  SpecId 5\n"
5548
5549                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5550
5551                 "%id        = OpVariable %uvec3ptr Input\n"
5552                 "%zero      = OpConstant %i32 0\n"
5553                 "%c_u32_6   = OpConstant %u32 6\n"
5554
5555                 "%sc_0      = OpSpecConstant %f32 0.\n"
5556                 "%sc_1      = OpSpecConstant %f32 0.\n"
5557                 "%sc_2      = OpSpecConstant %f32 0.\n"
5558                 "%sc_3      = OpSpecConstant %f32 0.\n"
5559                 "%sc_4      = OpSpecConstant %f32 0.\n"
5560                 "%sc_5      = OpSpecConstant %f32 0.\n"
5561
5562                 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5563                 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5564                 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5565                 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5566                 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5567                 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5568
5569                 "%main      = OpFunction %void None %voidf\n"
5570                 "%label     = OpLabel\n"
5571                 "%idval     = OpLoad %uvec3 %id\n"
5572                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5573                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5574                 "%selector  = OpUMod %u32 %x %c_u32_6\n"
5575                 "            OpSelectionMerge %exit None\n"
5576                 "            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5577
5578                 "%case0     = OpLabel\n"
5579                 "             OpStore %outloc %sc_0_quant\n"
5580                 "             OpBranch %exit\n"
5581
5582                 "%case1     = OpLabel\n"
5583                 "             OpStore %outloc %sc_1_quant\n"
5584                 "             OpBranch %exit\n"
5585
5586                 "%case2     = OpLabel\n"
5587                 "             OpStore %outloc %sc_2_quant\n"
5588                 "             OpBranch %exit\n"
5589
5590                 "%case3     = OpLabel\n"
5591                 "             OpStore %outloc %sc_3_quant\n"
5592                 "             OpBranch %exit\n"
5593
5594                 "%case4     = OpLabel\n"
5595                 "             OpStore %outloc %sc_4_quant\n"
5596                 "             OpBranch %exit\n"
5597
5598                 "%case5     = OpLabel\n"
5599                 "             OpStore %outloc %sc_5_quant\n"
5600                 "             OpBranch %exit\n"
5601
5602                 "%exit      = OpLabel\n"
5603                 "             OpReturn\n"
5604
5605                 "             OpFunctionEnd\n");
5606
5607         {
5608                 ComputeShaderSpec       spec;
5609                 const deUint8           numCases        = 4;
5610                 vector<float>           inputs          (numCases, 0.f);
5611                 vector<float>           outputs;
5612
5613                 spec.assembly           = shader;
5614                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5615
5616                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5617                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5618                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5619                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5620
5621                 outputs.push_back(std::numeric_limits<float>::infinity());
5622                 outputs.push_back(-std::numeric_limits<float>::infinity());
5623                 outputs.push_back(std::numeric_limits<float>::infinity());
5624                 outputs.push_back(-std::numeric_limits<float>::infinity());
5625
5626                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5627                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5628
5629                 group->addChild(new SpvAsmComputeShaderCase(
5630                         testCtx, "infinities", "Check that infinities propagated and created", spec));
5631         }
5632
5633         {
5634                 ComputeShaderSpec       spec;
5635                 const deUint8           numCases        = 2;
5636                 vector<float>           inputs          (numCases, 0.f);
5637                 vector<float>           outputs;
5638
5639                 spec.assembly           = shader;
5640                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5641                 spec.verifyIO           = &compareNan;
5642
5643                 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
5644                 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
5645
5646                 for (deUint8 idx = 0; idx < numCases; ++idx)
5647                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
5648
5649                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5650                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5651
5652                 group->addChild(new SpvAsmComputeShaderCase(
5653                         testCtx, "propagated_nans", "Check that nans are propagated", spec));
5654         }
5655
5656         {
5657                 ComputeShaderSpec       spec;
5658                 const deUint8           numCases        = 6;
5659                 vector<float>           inputs          (numCases, 0.f);
5660                 vector<float>           outputs;
5661
5662                 spec.assembly           = shader;
5663                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5664
5665                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
5666                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
5667                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
5668                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
5669                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
5670                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
5671
5672                 outputs.push_back(0.f);
5673                 outputs.push_back(-0.f);
5674                 outputs.push_back(0.f);
5675                 outputs.push_back(-0.f);
5676                 outputs.push_back(0.f);
5677                 outputs.push_back(-0.f);
5678
5679                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5680                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5681
5682                 group->addChild(new SpvAsmComputeShaderCase(
5683                         testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5684         }
5685
5686         {
5687                 ComputeShaderSpec       spec;
5688                 const deUint8           numCases        = 6;
5689                 vector<float>           inputs          (numCases, 0.f);
5690                 vector<float>           outputs;
5691
5692                 spec.assembly           = shader;
5693                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5694
5695                 for (deUint8 idx = 0; idx < 6; ++idx)
5696                 {
5697                         const float f = static_cast<float>(idx * 10 - 30) / 4.f;
5698                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
5699                         outputs.push_back(f);
5700                 }
5701
5702                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5703                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5704
5705                 group->addChild(new SpvAsmComputeShaderCase(
5706                         testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5707         }
5708
5709         {
5710                 ComputeShaderSpec       spec;
5711                 const deUint8           numCases        = 4;
5712                 vector<float>           inputs          (numCases, 0.f);
5713                 vector<float>           outputs;
5714
5715                 spec.assembly           = shader;
5716                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5717                 spec.verifyIO           = &compareOpQuantizeF16ComputeExactCase;
5718
5719                 outputs.push_back(constructNormalizedFloat(8, 0x300300));
5720                 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
5721                 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
5722                 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
5723
5724                 for (deUint8 idx = 0; idx < numCases; ++idx)
5725                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
5726
5727                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5728                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5729
5730                 group->addChild(new SpvAsmComputeShaderCase(
5731                         testCtx, "rounded", "Check that are rounded when needed", spec));
5732         }
5733
5734         return group.release();
5735 }
5736
5737 // Checks that constant null/composite values can be used in computation.
5738 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
5739 {
5740         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
5741         ComputeShaderSpec                               spec;
5742         de::Random                                              rnd                             (deStringHash(group->getName()));
5743         const int                                               numElements             = 100;
5744         vector<float>                                   positiveFloats  (numElements, 0);
5745         vector<float>                                   negativeFloats  (numElements, 0);
5746
5747         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5748
5749         for (size_t ndx = 0; ndx < numElements; ++ndx)
5750                 negativeFloats[ndx] = -positiveFloats[ndx];
5751
5752         spec.assembly =
5753                 "OpCapability Shader\n"
5754                 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
5755                 "OpMemoryModel Logical GLSL450\n"
5756                 "OpEntryPoint GLCompute %main \"main\" %id\n"
5757                 "OpExecutionMode %main LocalSize 1 1 1\n"
5758
5759                 "OpSource GLSL 430\n"
5760                 "OpName %main           \"main\"\n"
5761                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5762
5763                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5764
5765                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5766
5767                 "%fmat      = OpTypeMatrix %fvec3 3\n"
5768                 "%ten       = OpConstant %u32 10\n"
5769                 "%f32arr10  = OpTypeArray %f32 %ten\n"
5770                 "%fst       = OpTypeStruct %f32 %f32\n"
5771
5772                 + string(getComputeAsmInputOutputBuffer()) +
5773
5774                 "%id        = OpVariable %uvec3ptr Input\n"
5775                 "%zero      = OpConstant %i32 0\n"
5776
5777                 // Create a bunch of null values
5778                 "%unull     = OpConstantNull %u32\n"
5779                 "%fnull     = OpConstantNull %f32\n"
5780                 "%vnull     = OpConstantNull %fvec3\n"
5781                 "%mnull     = OpConstantNull %fmat\n"
5782                 "%anull     = OpConstantNull %f32arr10\n"
5783                 "%snull     = OpConstantComposite %fst %fnull %fnull\n"
5784
5785                 "%main      = OpFunction %void None %voidf\n"
5786                 "%label     = OpLabel\n"
5787                 "%idval     = OpLoad %uvec3 %id\n"
5788                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5789                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5790                 "%inval     = OpLoad %f32 %inloc\n"
5791                 "%neg       = OpFNegate %f32 %inval\n"
5792
5793                 // Get the abs() of (a certain element of) those null values
5794                 "%unull_cov = OpConvertUToF %f32 %unull\n"
5795                 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
5796                 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
5797                 "%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
5798                 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
5799                 "%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
5800                 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
5801                 "%anull_3   = OpCompositeExtract %f32 %anull 3\n"
5802                 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
5803                 "%snull_1   = OpCompositeExtract %f32 %snull 1\n"
5804                 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
5805
5806                 // Add them all
5807                 "%add1      = OpFAdd %f32 %neg  %unull_abs\n"
5808                 "%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
5809                 "%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
5810                 "%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
5811                 "%add5      = OpFAdd %f32 %add4 %anull_abs\n"
5812                 "%final     = OpFAdd %f32 %add5 %snull_abs\n"
5813
5814                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5815                 "             OpStore %outloc %final\n" // write to output
5816                 "             OpReturn\n"
5817                 "             OpFunctionEnd\n";
5818         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5819         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5820         spec.numWorkGroups = IVec3(numElements, 1, 1);
5821
5822         group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
5823
5824         return group.release();
5825 }
5826
5827 // Assembly code used for testing loop control is based on GLSL source code:
5828 // #version 430
5829 //
5830 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5831 //   float elements[];
5832 // } input_data;
5833 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5834 //   float elements[];
5835 // } output_data;
5836 //
5837 // void main() {
5838 //   uint x = gl_GlobalInvocationID.x;
5839 //   output_data.elements[x] = input_data.elements[x];
5840 //   for (uint i = 0; i < 4; ++i)
5841 //     output_data.elements[x] += 1.f;
5842 // }
5843 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
5844 {
5845         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
5846         vector<CaseParameter>                   cases;
5847         de::Random                                              rnd                             (deStringHash(group->getName()));
5848         const int                                               numElements             = 100;
5849         vector<float>                                   inputFloats             (numElements, 0);
5850         vector<float>                                   outputFloats    (numElements, 0);
5851         const StringTemplate                    shaderTemplate  (
5852                 string(getComputeAsmShaderPreamble()) +
5853
5854                 "OpSource GLSL 430\n"
5855                 "OpName %main \"main\"\n"
5856                 "OpName %id \"gl_GlobalInvocationID\"\n"
5857
5858                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5859
5860                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5861
5862                 "%u32ptr      = OpTypePointer Function %u32\n"
5863
5864                 "%id          = OpVariable %uvec3ptr Input\n"
5865                 "%zero        = OpConstant %i32 0\n"
5866                 "%uzero       = OpConstant %u32 0\n"
5867                 "%one         = OpConstant %i32 1\n"
5868                 "%constf1     = OpConstant %f32 1.0\n"
5869                 "%four        = OpConstant %u32 4\n"
5870
5871                 "%main        = OpFunction %void None %voidf\n"
5872                 "%entry       = OpLabel\n"
5873                 "%i           = OpVariable %u32ptr Function\n"
5874                 "               OpStore %i %uzero\n"
5875
5876                 "%idval       = OpLoad %uvec3 %id\n"
5877                 "%x           = OpCompositeExtract %u32 %idval 0\n"
5878                 "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
5879                 "%inval       = OpLoad %f32 %inloc\n"
5880                 "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
5881                 "               OpStore %outloc %inval\n"
5882                 "               OpBranch %loop_entry\n"
5883
5884                 "%loop_entry  = OpLabel\n"
5885                 "%i_val       = OpLoad %u32 %i\n"
5886                 "%cmp_lt      = OpULessThan %bool %i_val %four\n"
5887                 "               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
5888                 "               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
5889                 "%loop_body   = OpLabel\n"
5890                 "%outval      = OpLoad %f32 %outloc\n"
5891                 "%addf1       = OpFAdd %f32 %outval %constf1\n"
5892                 "               OpStore %outloc %addf1\n"
5893                 "%new_i       = OpIAdd %u32 %i_val %one\n"
5894                 "               OpStore %i %new_i\n"
5895                 "               OpBranch %loop_entry\n"
5896                 "%loop_merge  = OpLabel\n"
5897                 "               OpReturn\n"
5898                 "               OpFunctionEnd\n");
5899
5900         cases.push_back(CaseParameter("none",                           "None"));
5901         cases.push_back(CaseParameter("unroll",                         "Unroll"));
5902         cases.push_back(CaseParameter("dont_unroll",            "DontUnroll"));
5903
5904         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5905
5906         for (size_t ndx = 0; ndx < numElements; ++ndx)
5907                 outputFloats[ndx] = inputFloats[ndx] + 4.f;
5908
5909         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5910         {
5911                 map<string, string>             specializations;
5912                 ComputeShaderSpec               spec;
5913
5914                 specializations["CONTROL"] = cases[caseNdx].param;
5915                 spec.assembly = shaderTemplate.specialize(specializations);
5916                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5917                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5918                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5919
5920                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5921         }
5922
5923         group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
5924         group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
5925
5926         return group.release();
5927 }
5928
5929 // Assembly code used for testing selection control is based on GLSL source code:
5930 // #version 430
5931 //
5932 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5933 //   float elements[];
5934 // } input_data;
5935 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5936 //   float elements[];
5937 // } output_data;
5938 //
5939 // void main() {
5940 //   uint x = gl_GlobalInvocationID.x;
5941 //   float val = input_data.elements[x];
5942 //   if (val > 10.f)
5943 //     output_data.elements[x] = val + 1.f;
5944 //   else
5945 //     output_data.elements[x] = val - 1.f;
5946 // }
5947 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
5948 {
5949         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
5950         vector<CaseParameter>                   cases;
5951         de::Random                                              rnd                             (deStringHash(group->getName()));
5952         const int                                               numElements             = 100;
5953         vector<float>                                   inputFloats             (numElements, 0);
5954         vector<float>                                   outputFloats    (numElements, 0);
5955         const StringTemplate                    shaderTemplate  (
5956                 string(getComputeAsmShaderPreamble()) +
5957
5958                 "OpSource GLSL 430\n"
5959                 "OpName %main \"main\"\n"
5960                 "OpName %id \"gl_GlobalInvocationID\"\n"
5961
5962                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5963
5964                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5965
5966                 "%id       = OpVariable %uvec3ptr Input\n"
5967                 "%zero     = OpConstant %i32 0\n"
5968                 "%constf1  = OpConstant %f32 1.0\n"
5969                 "%constf10 = OpConstant %f32 10.0\n"
5970
5971                 "%main     = OpFunction %void None %voidf\n"
5972                 "%entry    = OpLabel\n"
5973                 "%idval    = OpLoad %uvec3 %id\n"
5974                 "%x        = OpCompositeExtract %u32 %idval 0\n"
5975                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
5976                 "%inval    = OpLoad %f32 %inloc\n"
5977                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
5978                 "%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
5979
5980                 "            OpSelectionMerge %if_end ${CONTROL}\n"
5981                 "            OpBranchConditional %cmp_gt %if_true %if_false\n"
5982                 "%if_true  = OpLabel\n"
5983                 "%addf1    = OpFAdd %f32 %inval %constf1\n"
5984                 "            OpStore %outloc %addf1\n"
5985                 "            OpBranch %if_end\n"
5986                 "%if_false = OpLabel\n"
5987                 "%subf1    = OpFSub %f32 %inval %constf1\n"
5988                 "            OpStore %outloc %subf1\n"
5989                 "            OpBranch %if_end\n"
5990                 "%if_end   = OpLabel\n"
5991                 "            OpReturn\n"
5992                 "            OpFunctionEnd\n");
5993
5994         cases.push_back(CaseParameter("none",                                   "None"));
5995         cases.push_back(CaseParameter("flatten",                                "Flatten"));
5996         cases.push_back(CaseParameter("dont_flatten",                   "DontFlatten"));
5997         cases.push_back(CaseParameter("flatten_dont_flatten",   "DontFlatten|Flatten"));
5998
5999         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6000
6001         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6002         floorAll(inputFloats);
6003
6004         for (size_t ndx = 0; ndx < numElements; ++ndx)
6005                 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6006
6007         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6008         {
6009                 map<string, string>             specializations;
6010                 ComputeShaderSpec               spec;
6011
6012                 specializations["CONTROL"] = cases[caseNdx].param;
6013                 spec.assembly = shaderTemplate.specialize(specializations);
6014                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6015                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6016                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6017
6018                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6019         }
6020
6021         return group.release();
6022 }
6023
6024 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6025 {
6026         // Generate a long name.
6027         std::string longname;
6028         longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6029
6030         // Some bad names, abusing utf-8 encoding. This may also cause problems
6031         // with the logs.
6032         // 1. Various illegal code points in utf-8
6033         std::string utf8illegal =
6034                 "Illegal bytes in UTF-8: "
6035                 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6036                 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6037
6038         // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6039         std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6040
6041         // 3. Some overlong encodings
6042         std::string utf8overlong =
6043                 "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6044                 "\xf0\x8f\xbf\xbf";
6045
6046         // 4. Internet "zalgo" meme "bleeding text"
6047         std::string utf8zalgo =
6048                 "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6049                 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6050                 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6051                 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6052                 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6053                 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6054                 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6055                 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6056                 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6057                 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6058                 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6059                 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6060                 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6061                 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6062                 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6063                 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6064                 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6065                 "\x93\xcd\x96\xcc\x97\xff";
6066
6067         // General name abuses
6068         abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6069         abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6070         abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6071         abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6072         abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6073
6074         // GL keywords
6075         abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6076         abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6077         abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6078         abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6079         abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6080         abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6081         abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6082         abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6083         abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6084         abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6085         abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6086         abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6087         abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6088         abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6089         abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6090         abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6091         abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6092         abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6093         abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6094         abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6095         abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6096 }
6097
6098 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6099 {
6100         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6101         de::MovePtr<tcu::TestCaseGroup> entryMainGroup  (new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6102         de::MovePtr<tcu::TestCaseGroup> entryNotGroup   (new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6103         de::MovePtr<tcu::TestCaseGroup> abuseGroup              (new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6104         vector<CaseParameter>                   cases;
6105         vector<CaseParameter>                   abuseCases;
6106         vector<string>                                  testFunc;
6107         de::Random                                              rnd                             (deStringHash(group->getName()));
6108         const int                                               numElements             = 128;
6109         vector<float>                                   inputFloats             (numElements, 0);
6110         vector<float>                                   outputFloats    (numElements, 0);
6111
6112         getOpNameAbuseCases(abuseCases);
6113
6114         fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6115
6116         for(size_t ndx = 0; ndx < numElements; ++ndx)
6117                 outputFloats[ndx] = -inputFloats[ndx];
6118
6119         const string commonShaderHeader =
6120                 "OpCapability Shader\n"
6121                 "OpMemoryModel Logical GLSL450\n"
6122                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6123                 "OpExecutionMode %main LocalSize 1 1 1\n";
6124
6125         const string commonShaderFooter =
6126                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6127
6128                 + string(getComputeAsmInputOutputBufferTraits())
6129                 + string(getComputeAsmCommonTypes())
6130                 + string(getComputeAsmInputOutputBuffer()) +
6131
6132                 "%id        = OpVariable %uvec3ptr Input\n"
6133                 "%zero      = OpConstant %i32 0\n"
6134
6135                 "%func      = OpFunction %void None %voidf\n"
6136                 "%5         = OpLabel\n"
6137                 "             OpReturn\n"
6138                 "             OpFunctionEnd\n"
6139
6140                 "%main      = OpFunction %void None %voidf\n"
6141                 "%entry     = OpLabel\n"
6142                 "%7         = OpFunctionCall %void %func\n"
6143
6144                 "%idval     = OpLoad %uvec3 %id\n"
6145                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6146
6147                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6148                 "%inval     = OpLoad %f32 %inloc\n"
6149                 "%neg       = OpFNegate %f32 %inval\n"
6150                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6151                 "             OpStore %outloc %neg\n"
6152
6153                 "             OpReturn\n"
6154                 "             OpFunctionEnd\n";
6155
6156         const StringTemplate shaderTemplate (
6157                 "OpCapability Shader\n"
6158                 "OpMemoryModel Logical GLSL450\n"
6159                 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6160                 "OpExecutionMode %main LocalSize 1 1 1\n"
6161                 "OpName %${ID} \"${NAME}\"\n" +
6162                 commonShaderFooter);
6163
6164         const std::string multipleNames =
6165                 commonShaderHeader +
6166                 "OpName %main \"to_be\"\n"
6167                 "OpName %id   \"or_not\"\n"
6168                 "OpName %main \"to_be\"\n"
6169                 "OpName %main \"makes_no\"\n"
6170                 "OpName %func \"difference\"\n"
6171                 "OpName %5    \"to_me\"\n" +
6172                 commonShaderFooter;
6173
6174         {
6175                 ComputeShaderSpec       spec;
6176
6177                 spec.assembly           = multipleNames;
6178                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6179                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6180                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6181
6182                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6183         }
6184
6185         const std::string everythingNamed =
6186                 commonShaderHeader +
6187                 "OpName %main   \"name1\"\n"
6188                 "OpName %id     \"name2\"\n"
6189                 "OpName %zero   \"name3\"\n"
6190                 "OpName %entry  \"name4\"\n"
6191                 "OpName %func   \"name5\"\n"
6192                 "OpName %5      \"name6\"\n"
6193                 "OpName %7      \"name7\"\n"
6194                 "OpName %idval  \"name8\"\n"
6195                 "OpName %inloc  \"name9\"\n"
6196                 "OpName %inval  \"name10\"\n"
6197                 "OpName %neg    \"name11\"\n"
6198                 "OpName %outloc \"name12\"\n"+
6199                 commonShaderFooter;
6200         {
6201                 ComputeShaderSpec       spec;
6202
6203                 spec.assembly           = everythingNamed;
6204                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6205                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6206                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6207
6208                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6209         }
6210
6211         const std::string everythingNamedTheSame =
6212                 commonShaderHeader +
6213                 "OpName %main   \"the_same\"\n"
6214                 "OpName %id     \"the_same\"\n"
6215                 "OpName %zero   \"the_same\"\n"
6216                 "OpName %entry  \"the_same\"\n"
6217                 "OpName %func   \"the_same\"\n"
6218                 "OpName %5      \"the_same\"\n"
6219                 "OpName %7      \"the_same\"\n"
6220                 "OpName %idval  \"the_same\"\n"
6221                 "OpName %inloc  \"the_same\"\n"
6222                 "OpName %inval  \"the_same\"\n"
6223                 "OpName %neg    \"the_same\"\n"
6224                 "OpName %outloc \"the_same\"\n"+
6225                 commonShaderFooter;
6226         {
6227                 ComputeShaderSpec       spec;
6228
6229                 spec.assembly           = everythingNamedTheSame;
6230                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6231                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6232                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6233
6234                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6235         }
6236
6237         // main_is_...
6238         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6239         {
6240                 map<string, string>     specializations;
6241                 ComputeShaderSpec       spec;
6242
6243                 specializations["ENTRY"]        = "main";
6244                 specializations["ID"]           = "main";
6245                 specializations["NAME"]         = abuseCases[ndx].param;
6246                 spec.assembly                           = shaderTemplate.specialize(specializations);
6247                 spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6248                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6249                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6250
6251                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6252         }
6253
6254         // x_is_....
6255         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6256         {
6257                 map<string, string>     specializations;
6258                 ComputeShaderSpec       spec;
6259
6260                 specializations["ENTRY"]        = "main";
6261                 specializations["ID"]           = "x";
6262                 specializations["NAME"]         = abuseCases[ndx].param;
6263                 spec.assembly                           = shaderTemplate.specialize(specializations);
6264                 spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6265                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6266                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6267
6268                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6269         }
6270
6271         cases.push_back(CaseParameter("_is_main", "main"));
6272         cases.push_back(CaseParameter("_is_not_main", "not_main"));
6273         testFunc.push_back("main");
6274         testFunc.push_back("func");
6275
6276         for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6277         {
6278                 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6279                 {
6280                         map<string, string>     specializations;
6281                         ComputeShaderSpec       spec;
6282
6283                         specializations["ENTRY"]        = "main";
6284                         specializations["ID"]           = testFunc[fNdx];
6285                         specializations["NAME"]         = cases[ndx].param;
6286                         spec.assembly                           = shaderTemplate.specialize(specializations);
6287                         spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6288                         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6289                         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6290
6291                         entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6292                 }
6293         }
6294
6295         cases.push_back(CaseParameter("_is_entry", "rdc"));
6296
6297         for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6298         {
6299                 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6300                 {
6301                         map<string, string>     specializations;
6302                         ComputeShaderSpec       spec;
6303
6304                         specializations["ENTRY"]        = "rdc";
6305                         specializations["ID"]           = testFunc[fNdx];
6306                         specializations["NAME"]         = cases[ndx].param;
6307                         spec.assembly                           = shaderTemplate.specialize(specializations);
6308                         spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6309                         spec.entryPoint                         = "rdc";
6310                         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6311                         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6312
6313                         entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6314                 }
6315         }
6316
6317         group->addChild(entryMainGroup.release());
6318         group->addChild(entryNotGroup.release());
6319         group->addChild(abuseGroup.release());
6320
6321         return group.release();
6322 }
6323
6324 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6325 {
6326         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6327         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6328         vector<CaseParameter>                   abuseCases;
6329         vector<string>                                  testFunc;
6330         de::Random                                              rnd(deStringHash(group->getName()));
6331         const int                                               numElements = 128;
6332         vector<float>                                   inputFloats(numElements, 0);
6333         vector<float>                                   outputFloats(numElements, 0);
6334
6335         getOpNameAbuseCases(abuseCases);
6336
6337         fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6338
6339         for (size_t ndx = 0; ndx < numElements; ++ndx)
6340                 outputFloats[ndx] = -inputFloats[ndx];
6341
6342         const string commonShaderHeader =
6343                 "OpCapability Shader\n"
6344                 "OpMemoryModel Logical GLSL450\n"
6345                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6346                 "OpExecutionMode %main LocalSize 1 1 1\n";
6347
6348         const string commonShaderFooter =
6349                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6350
6351                 + string(getComputeAsmInputOutputBufferTraits())
6352                 + string(getComputeAsmCommonTypes())
6353                 + string(getComputeAsmInputOutputBuffer()) +
6354
6355                 "%u3str     = OpTypeStruct %u32 %u32 %u32\n"
6356
6357                 "%id        = OpVariable %uvec3ptr Input\n"
6358                 "%zero      = OpConstant %i32 0\n"
6359
6360                 "%main      = OpFunction %void None %voidf\n"
6361                 "%entry     = OpLabel\n"
6362
6363                 "%idval     = OpLoad %uvec3 %id\n"
6364                 "%x0        = OpCompositeExtract %u32 %idval 0\n"
6365
6366                 "%idstr     = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6367                 "%x         = OpCompositeExtract %u32 %idstr 0\n"
6368
6369                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6370                 "%inval     = OpLoad %f32 %inloc\n"
6371                 "%neg       = OpFNegate %f32 %inval\n"
6372                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6373                 "             OpStore %outloc %neg\n"
6374
6375                 "             OpReturn\n"
6376                 "             OpFunctionEnd\n";
6377
6378         const StringTemplate shaderTemplate(
6379                 commonShaderHeader +
6380                 "OpMemberName %u3str 0 \"${NAME}\"\n" +
6381                 commonShaderFooter);
6382
6383         const std::string multipleNames =
6384                 commonShaderHeader +
6385                 "OpMemberName %u3str 0 \"to_be\"\n"
6386                 "OpMemberName %u3str 1 \"or_not\"\n"
6387                 "OpMemberName %u3str 0 \"to_be\"\n"
6388                 "OpMemberName %u3str 2 \"makes_no\"\n"
6389                 "OpMemberName %u3str 0 \"difference\"\n"
6390                 "OpMemberName %u3str 0 \"to_me\"\n" +
6391                 commonShaderFooter;
6392         {
6393                 ComputeShaderSpec       spec;
6394
6395                 spec.assembly = multipleNames;
6396                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6397                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6398                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6399
6400                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6401         }
6402
6403         const std::string everythingNamedTheSame =
6404                 commonShaderHeader +
6405                 "OpMemberName %u3str 0 \"the_same\"\n"
6406                 "OpMemberName %u3str 1 \"the_same\"\n"
6407                 "OpMemberName %u3str 2 \"the_same\"\n" +
6408                 commonShaderFooter;
6409
6410         {
6411                 ComputeShaderSpec       spec;
6412
6413                 spec.assembly = everythingNamedTheSame;
6414                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6415                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6416                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6417
6418                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6419         }
6420
6421         // u3str_x_is_....
6422         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6423         {
6424                 map<string, string>     specializations;
6425                 ComputeShaderSpec       spec;
6426
6427                 specializations["NAME"] = abuseCases[ndx].param;
6428                 spec.assembly = shaderTemplate.specialize(specializations);
6429                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6430                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6431                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6432
6433                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6434         }
6435
6436         group->addChild(abuseGroup.release());
6437
6438         return group.release();
6439 }
6440
6441 // Assembly code used for testing function control is based on GLSL source code:
6442 //
6443 // #version 430
6444 //
6445 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6446 //   float elements[];
6447 // } input_data;
6448 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6449 //   float elements[];
6450 // } output_data;
6451 //
6452 // float const10() { return 10.f; }
6453 //
6454 // void main() {
6455 //   uint x = gl_GlobalInvocationID.x;
6456 //   output_data.elements[x] = input_data.elements[x] + const10();
6457 // }
6458 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6459 {
6460         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6461         vector<CaseParameter>                   cases;
6462         de::Random                                              rnd                             (deStringHash(group->getName()));
6463         const int                                               numElements             = 100;
6464         vector<float>                                   inputFloats             (numElements, 0);
6465         vector<float>                                   outputFloats    (numElements, 0);
6466         const StringTemplate                    shaderTemplate  (
6467                 string(getComputeAsmShaderPreamble()) +
6468
6469                 "OpSource GLSL 430\n"
6470                 "OpName %main \"main\"\n"
6471                 "OpName %func_const10 \"const10(\"\n"
6472                 "OpName %id \"gl_GlobalInvocationID\"\n"
6473
6474                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6475
6476                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6477
6478                 "%f32f = OpTypeFunction %f32\n"
6479                 "%id = OpVariable %uvec3ptr Input\n"
6480                 "%zero = OpConstant %i32 0\n"
6481                 "%constf10 = OpConstant %f32 10.0\n"
6482
6483                 "%main         = OpFunction %void None %voidf\n"
6484                 "%entry        = OpLabel\n"
6485                 "%idval        = OpLoad %uvec3 %id\n"
6486                 "%x            = OpCompositeExtract %u32 %idval 0\n"
6487                 "%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
6488                 "%inval        = OpLoad %f32 %inloc\n"
6489                 "%ret_10       = OpFunctionCall %f32 %func_const10\n"
6490                 "%fadd         = OpFAdd %f32 %inval %ret_10\n"
6491                 "%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
6492                 "                OpStore %outloc %fadd\n"
6493                 "                OpReturn\n"
6494                 "                OpFunctionEnd\n"
6495
6496                 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6497                 "%label        = OpLabel\n"
6498                 "                OpReturnValue %constf10\n"
6499                 "                OpFunctionEnd\n");
6500
6501         cases.push_back(CaseParameter("none",                                           "None"));
6502         cases.push_back(CaseParameter("inline",                                         "Inline"));
6503         cases.push_back(CaseParameter("dont_inline",                            "DontInline"));
6504         cases.push_back(CaseParameter("pure",                                           "Pure"));
6505         cases.push_back(CaseParameter("const",                                          "Const"));
6506         cases.push_back(CaseParameter("inline_pure",                            "Inline|Pure"));
6507         cases.push_back(CaseParameter("const_dont_inline",                      "Const|DontInline"));
6508         cases.push_back(CaseParameter("inline_dont_inline",                     "Inline|DontInline"));
6509         cases.push_back(CaseParameter("pure_inline_dont_inline",        "Pure|Inline|DontInline"));
6510
6511         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6512
6513         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6514         floorAll(inputFloats);
6515
6516         for (size_t ndx = 0; ndx < numElements; ++ndx)
6517                 outputFloats[ndx] = inputFloats[ndx] + 10.f;
6518
6519         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6520         {
6521                 map<string, string>             specializations;
6522                 ComputeShaderSpec               spec;
6523
6524                 specializations["CONTROL"] = cases[caseNdx].param;
6525                 spec.assembly = shaderTemplate.specialize(specializations);
6526                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6527                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6528                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6529
6530                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6531         }
6532
6533         return group.release();
6534 }
6535
6536 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6537 {
6538         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6539         vector<CaseParameter>                   cases;
6540         de::Random                                              rnd                             (deStringHash(group->getName()));
6541         const int                                               numElements             = 100;
6542         vector<float>                                   inputFloats             (numElements, 0);
6543         vector<float>                                   outputFloats    (numElements, 0);
6544         const StringTemplate                    shaderTemplate  (
6545                 string(getComputeAsmShaderPreamble()) +
6546
6547                 "OpSource GLSL 430\n"
6548                 "OpName %main           \"main\"\n"
6549                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6550
6551                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6552
6553                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6554
6555                 "%f32ptr_f  = OpTypePointer Function %f32\n"
6556
6557                 "%id        = OpVariable %uvec3ptr Input\n"
6558                 "%zero      = OpConstant %i32 0\n"
6559                 "%four      = OpConstant %i32 4\n"
6560
6561                 "%main      = OpFunction %void None %voidf\n"
6562                 "%label     = OpLabel\n"
6563                 "%copy      = OpVariable %f32ptr_f Function\n"
6564                 "%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
6565                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6566                 "%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
6567                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6568                 "             OpCopyMemory %copy %inloc ${ACCESS}\n"
6569                 "%val1      = OpLoad %f32 %copy\n"
6570                 "%val2      = OpLoad %f32 %inloc\n"
6571                 "%add       = OpFAdd %f32 %val1 %val2\n"
6572                 "             OpStore %outloc %add ${ACCESS}\n"
6573                 "             OpReturn\n"
6574                 "             OpFunctionEnd\n");
6575
6576         cases.push_back(CaseParameter("null",                                   ""));
6577         cases.push_back(CaseParameter("none",                                   "None"));
6578         cases.push_back(CaseParameter("volatile",                               "Volatile"));
6579         cases.push_back(CaseParameter("aligned",                                "Aligned 4"));
6580         cases.push_back(CaseParameter("nontemporal",                    "Nontemporal"));
6581         cases.push_back(CaseParameter("aligned_nontemporal",    "Aligned|Nontemporal 4"));
6582         cases.push_back(CaseParameter("aligned_volatile",               "Volatile|Aligned 4"));
6583
6584         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6585
6586         for (size_t ndx = 0; ndx < numElements; ++ndx)
6587                 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6588
6589         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6590         {
6591                 map<string, string>             specializations;
6592                 ComputeShaderSpec               spec;
6593
6594                 specializations["ACCESS"] = cases[caseNdx].param;
6595                 spec.assembly = shaderTemplate.specialize(specializations);
6596                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6597                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6598                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6599
6600                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6601         }
6602
6603         return group.release();
6604 }
6605
6606 // Checks that we can get undefined values for various types, without exercising a computation with it.
6607 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6608 {
6609         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6610         vector<CaseParameter>                   cases;
6611         de::Random                                              rnd                             (deStringHash(group->getName()));
6612         const int                                               numElements             = 100;
6613         vector<float>                                   positiveFloats  (numElements, 0);
6614         vector<float>                                   negativeFloats  (numElements, 0);
6615         const StringTemplate                    shaderTemplate  (
6616                 string(getComputeAsmShaderPreamble()) +
6617
6618                 "OpSource GLSL 430\n"
6619                 "OpName %main           \"main\"\n"
6620                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6621
6622                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6623
6624                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6625                 "%uvec2     = OpTypeVector %u32 2\n"
6626                 "%fvec4     = OpTypeVector %f32 4\n"
6627                 "%fmat33    = OpTypeMatrix %fvec3 3\n"
6628                 "%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
6629                 "%sampler   = OpTypeSampler\n"
6630                 "%simage    = OpTypeSampledImage %image\n"
6631                 "%const100  = OpConstant %u32 100\n"
6632                 "%uarr100   = OpTypeArray %i32 %const100\n"
6633                 "%struct    = OpTypeStruct %f32 %i32 %u32\n"
6634                 "%pointer   = OpTypePointer Function %i32\n"
6635                 + string(getComputeAsmInputOutputBuffer()) +
6636
6637                 "%id        = OpVariable %uvec3ptr Input\n"
6638                 "%zero      = OpConstant %i32 0\n"
6639
6640                 "%main      = OpFunction %void None %voidf\n"
6641                 "%label     = OpLabel\n"
6642
6643                 "%undef     = OpUndef ${TYPE}\n"
6644
6645                 "%idval     = OpLoad %uvec3 %id\n"
6646                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6647
6648                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6649                 "%inval     = OpLoad %f32 %inloc\n"
6650                 "%neg       = OpFNegate %f32 %inval\n"
6651                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6652                 "             OpStore %outloc %neg\n"
6653                 "             OpReturn\n"
6654                 "             OpFunctionEnd\n");
6655
6656         cases.push_back(CaseParameter("bool",                   "%bool"));
6657         cases.push_back(CaseParameter("sint32",                 "%i32"));
6658         cases.push_back(CaseParameter("uint32",                 "%u32"));
6659         cases.push_back(CaseParameter("float32",                "%f32"));
6660         cases.push_back(CaseParameter("vec4float32",    "%fvec4"));
6661         cases.push_back(CaseParameter("vec2uint32",             "%uvec2"));
6662         cases.push_back(CaseParameter("matrix",                 "%fmat33"));
6663         cases.push_back(CaseParameter("image",                  "%image"));
6664         cases.push_back(CaseParameter("sampler",                "%sampler"));
6665         cases.push_back(CaseParameter("sampledimage",   "%simage"));
6666         cases.push_back(CaseParameter("array",                  "%uarr100"));
6667         cases.push_back(CaseParameter("runtimearray",   "%f32arr"));
6668         cases.push_back(CaseParameter("struct",                 "%struct"));
6669         cases.push_back(CaseParameter("pointer",                "%pointer"));
6670
6671         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6672
6673         for (size_t ndx = 0; ndx < numElements; ++ndx)
6674                 negativeFloats[ndx] = -positiveFloats[ndx];
6675
6676         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6677         {
6678                 map<string, string>             specializations;
6679                 ComputeShaderSpec               spec;
6680
6681                 specializations["TYPE"] = cases[caseNdx].param;
6682                 spec.assembly = shaderTemplate.specialize(specializations);
6683                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6684                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6685                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6686
6687                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6688         }
6689
6690                 return group.release();
6691 }
6692
6693 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
6694 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
6695 {
6696         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
6697         vector<CaseParameter>                   cases;
6698         de::Random                                              rnd                             (deStringHash(group->getName()));
6699         const int                                               numElements             = 100;
6700         vector<float>                                   positiveFloats  (numElements, 0);
6701         vector<float>                                   negativeFloats  (numElements, 0);
6702         const StringTemplate                    shaderTemplate  (
6703                 "OpCapability Shader\n"
6704                 "OpCapability Float16\n"
6705                 "OpMemoryModel Logical GLSL450\n"
6706                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6707                 "OpExecutionMode %main LocalSize 1 1 1\n"
6708                 "OpSource GLSL 430\n"
6709                 "OpName %main           \"main\"\n"
6710                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6711
6712                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6713
6714                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6715
6716                 "%id        = OpVariable %uvec3ptr Input\n"
6717                 "%zero      = OpConstant %i32 0\n"
6718                 "%f16       = OpTypeFloat 16\n"
6719                 "%c_f16_0   = OpConstant %f16 0.0\n"
6720                 "%c_f16_0_5 = OpConstant %f16 0.5\n"
6721                 "%c_f16_1   = OpConstant %f16 1.0\n"
6722                 "%v2f16     = OpTypeVector %f16 2\n"
6723                 "%v3f16     = OpTypeVector %f16 3\n"
6724                 "%v4f16     = OpTypeVector %f16 4\n"
6725
6726                 "${CONSTANT}\n"
6727
6728                 "%main      = OpFunction %void None %voidf\n"
6729                 "%label     = OpLabel\n"
6730                 "%idval     = OpLoad %uvec3 %id\n"
6731                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6732                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6733                 "%inval     = OpLoad %f32 %inloc\n"
6734                 "%neg       = OpFNegate %f32 %inval\n"
6735                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6736                 "             OpStore %outloc %neg\n"
6737                 "             OpReturn\n"
6738                 "             OpFunctionEnd\n");
6739
6740
6741         cases.push_back(CaseParameter("vector",                 "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
6742         cases.push_back(CaseParameter("matrix",                 "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
6743                                                                                                         "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
6744                                                                                                         "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
6745         cases.push_back(CaseParameter("struct",                 "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
6746                                                                                                         "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
6747                                                                                                         "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
6748                                                                                                         "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
6749                                                                                                         "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
6750         cases.push_back(CaseParameter("nested_struct",  "%st1 = OpTypeStruct %i32 %f16\n"
6751                                                                                                         "%st2 = OpTypeStruct %i32 %i32\n"
6752                                                                                                         "%struct = OpTypeStruct %st1 %st2\n"
6753                                                                                                         "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
6754                                                                                                         "%st2val = OpConstantComposite %st2 %zero %zero\n"
6755                                                                                                         "%const = OpConstantComposite %struct %st1val %st2val"));
6756
6757         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6758
6759         for (size_t ndx = 0; ndx < numElements; ++ndx)
6760                 negativeFloats[ndx] = -positiveFloats[ndx];
6761
6762         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6763         {
6764                 map<string, string>             specializations;
6765                 ComputeShaderSpec               spec;
6766
6767                 specializations["CONSTANT"] = cases[caseNdx].param;
6768                 spec.assembly = shaderTemplate.specialize(specializations);
6769                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6770                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6771                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6772
6773                 spec.extensions.push_back("VK_KHR_shader_float16_int8");
6774
6775                 spec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
6776
6777                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6778         }
6779
6780         return group.release();
6781 }
6782
6783 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
6784 {
6785         const size_t            inDataLength    = inData.size();
6786         vector<deFloat16>       result;
6787
6788         result.reserve(inDataLength * inDataLength);
6789
6790         if (argNo == 0)
6791         {
6792                 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
6793                         result.insert(result.end(), inData.begin(), inData.end());
6794         }
6795
6796         if (argNo == 1)
6797         {
6798                 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
6799                 {
6800                         const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
6801
6802                         result.insert(result.end(), tmp.begin(), tmp.end());
6803                 }
6804         }
6805
6806         return result;
6807 }
6808
6809 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
6810 {
6811         vector<deFloat16>       vec;
6812         vector<deFloat16>       result;
6813
6814         // Create vectors. vec will contain each possible pair from inData
6815         {
6816                 const size_t    inDataLength    = inData.size();
6817
6818                 DE_ASSERT(inDataLength <= 64);
6819
6820                 vec.reserve(2 * inDataLength * inDataLength);
6821
6822                 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
6823                 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
6824                 {
6825                         vec.push_back(inData[numIdxX]);
6826                         vec.push_back(inData[numIdxY]);
6827                 }
6828         }
6829
6830         // Create vector pairs. result will contain each possible pair from vec
6831         {
6832                 const size_t    coordsPerVector = 2;
6833                 const size_t    vectorsCount    = vec.size() / coordsPerVector;
6834
6835                 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
6836
6837                 if (argNo == 0)
6838                 {
6839                         for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
6840                         for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
6841                         {
6842                                 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
6843                                         result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
6844                         }
6845                 }
6846
6847                 if (argNo == 1)
6848                 {
6849                         for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
6850                         for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
6851                         {
6852                                 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
6853                                         result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
6854                         }
6855                 }
6856         }
6857
6858         return result;
6859 }
6860
6861 struct fp16isNan                        { bool operator()(const tcu::Float16 in1, const tcu::Float16)           { return in1.isNaN(); } };
6862 struct fp16isInf                        { bool operator()(const tcu::Float16 in1, const tcu::Float16)           { return in1.isInf(); } };
6863 struct fp16isEqual                      { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() == in2.asFloat(); } };
6864 struct fp16isUnequal            { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() != in2.asFloat(); } };
6865 struct fp16isLess                       { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() <  in2.asFloat(); } };
6866 struct fp16isGreater            { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() >  in2.asFloat(); } };
6867 struct fp16isLessOrEqual        { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() <= in2.asFloat(); } };
6868 struct fp16isGreaterOrEqual     { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() >= in2.asFloat(); } };
6869
6870 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
6871 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
6872 {
6873         if (inputs.size() != 2 || outputAllocs.size() != 1)
6874                 return false;
6875
6876         vector<deUint8> input1Bytes;
6877         vector<deUint8> input2Bytes;
6878
6879         inputs[0].getBytes(input1Bytes);
6880         inputs[1].getBytes(input2Bytes);
6881
6882         const deUint32                  denormModesCount                        = 2;
6883         const deFloat16                 float16one                                      = tcu::Float16(1.0f).bits();
6884         const deFloat16                 float16zero                                     = tcu::Float16(0.0f).bits();
6885         const tcu::Float16              zero                                            = tcu::Float16::zero(1);
6886         const deFloat16* const  outputAsFP16                            = static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
6887         const deFloat16* const  input1AsFP16                            = reinterpret_cast<deFloat16* const>(&input1Bytes.front());
6888         const deFloat16* const  input2AsFP16                            = reinterpret_cast<deFloat16* const>(&input2Bytes.front());
6889         deUint32                                successfulRuns                          = denormModesCount;
6890         std::string                             results[denormModesCount];
6891         TestedLogicalFunction   testedLogicalFunction;
6892
6893         for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
6894         {
6895                 const bool flushToZero = (denormMode == 1);
6896
6897                 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
6898                 {
6899                         const tcu::Float16      f1pre                   = tcu::Float16(input1AsFP16[idx]);
6900                         const tcu::Float16      f2pre                   = tcu::Float16(input2AsFP16[idx]);
6901                         const tcu::Float16      f1                              = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
6902                         const tcu::Float16      f2                              = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
6903                         deFloat16                       expectedOutput  = float16zero;
6904
6905                         if (onlyTestFunc)
6906                         {
6907                                 if (testedLogicalFunction(f1, f2))
6908                                         expectedOutput = float16one;
6909                         }
6910                         else
6911                         {
6912                                 const bool      f1nan   = f1.isNaN();
6913                                 const bool      f2nan   = f2.isNaN();
6914
6915                                 // Skip NaN floats if not supported by implementation
6916                                 if (!nanSupported && (f1nan || f2nan))
6917                                         continue;
6918
6919                                 if (unationModeAnd)
6920                                 {
6921                                         const bool      ordered         = !f1nan && !f2nan;
6922
6923                                         if (ordered && testedLogicalFunction(f1, f2))
6924                                                 expectedOutput = float16one;
6925                                 }
6926                                 else
6927                                 {
6928                                         const bool      unordered       = f1nan || f2nan;
6929
6930                                         if (unordered || testedLogicalFunction(f1, f2))
6931                                                 expectedOutput = float16one;
6932                                 }
6933                         }
6934
6935                         if (outputAsFP16[idx] != expectedOutput)
6936                         {
6937                                 std::ostringstream str;
6938
6939                                 str << "ERROR: Sub-case #" << idx
6940                                         << " flushToZero:" << flushToZero
6941                                         << std::hex
6942                                         << " failed, inputs: 0x" << f1.bits()
6943                                         << ";0x" << f2.bits()
6944                                         << " output: 0x" << outputAsFP16[idx]
6945                                         << " expected output: 0x" << expectedOutput;
6946
6947                                 results[denormMode] = str.str();
6948
6949                                 successfulRuns--;
6950
6951                                 break;
6952                         }
6953                 }
6954         }
6955
6956         if (successfulRuns == 0)
6957                 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
6958                         log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
6959
6960         return successfulRuns > 0;
6961 }
6962
6963 } // anonymous
6964
6965 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
6966 {
6967         struct NameCodePair { string name, code; };
6968         RGBA                                                    defaultColors[4];
6969         de::MovePtr<tcu::TestCaseGroup> opSourceTests                   (new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
6970         const std::string                               opsourceGLSLWithFile    = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
6971         map<string, string>                             fragments                               = passthruFragments();
6972         const NameCodePair                              tests[]                                 =
6973         {
6974                 {"unknown", "OpSource Unknown 321"},
6975                 {"essl", "OpSource ESSL 310"},
6976                 {"glsl", "OpSource GLSL 450"},
6977                 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
6978                 {"opencl_c", "OpSource OpenCL_C 120"},
6979                 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
6980                 {"file", opsourceGLSLWithFile},
6981                 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
6982                 // Longest possible source string: SPIR-V limits instructions to 65535
6983                 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
6984                 // contain 65530 UTF8 characters (one word each) plus one last word
6985                 // containing 3 ASCII characters and \0.
6986                 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
6987         };
6988
6989         getDefaultColors(defaultColors);
6990         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
6991         {
6992                 fragments["debug"] = tests[testNdx].code;
6993                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
6994         }
6995
6996         return opSourceTests.release();
6997 }
6998
6999 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7000 {
7001         struct NameCodePair { string name, code; };
7002         RGBA                                                            defaultColors[4];
7003         de::MovePtr<tcu::TestCaseGroup>         opSourceTests           (new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7004         map<string, string>                                     fragments                       = passthruFragments();
7005         const std::string                                       opsource                        = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7006         const NameCodePair                                      tests[]                         =
7007         {
7008                 {"empty", opsource + "OpSourceContinued \"\""},
7009                 {"short", opsource + "OpSourceContinued \"abcde\""},
7010                 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7011                 // Longest possible source string: SPIR-V limits instructions to 65535
7012                 // words, of which the first one is OpSourceContinued/length; the rest
7013                 // will contain 65533 UTF8 characters (one word each) plus one last word
7014                 // containing 3 ASCII characters and \0.
7015                 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7016         };
7017
7018         getDefaultColors(defaultColors);
7019         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7020         {
7021                 fragments["debug"] = tests[testNdx].code;
7022                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7023         }
7024
7025         return opSourceTests.release();
7026 }
7027 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7028 {
7029         RGBA                                                             defaultColors[4];
7030         de::MovePtr<tcu::TestCaseGroup>          opLineTests             (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7031         map<string, string>                                      fragments;
7032         getDefaultColors(defaultColors);
7033         fragments["debug"]                      =
7034                 "%name = OpString \"name\"\n";
7035
7036         fragments["pre_main"]   =
7037                 "OpNoLine\n"
7038                 "OpNoLine\n"
7039                 "OpLine %name 1 1\n"
7040                 "OpNoLine\n"
7041                 "OpLine %name 1 1\n"
7042                 "OpLine %name 1 1\n"
7043                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7044                 "OpNoLine\n"
7045                 "OpLine %name 1 1\n"
7046                 "OpNoLine\n"
7047                 "OpLine %name 1 1\n"
7048                 "OpLine %name 1 1\n"
7049                 "%second_param1 = OpFunctionParameter %v4f32\n"
7050                 "OpNoLine\n"
7051                 "OpNoLine\n"
7052                 "%label_secondfunction = OpLabel\n"
7053                 "OpNoLine\n"
7054                 "OpReturnValue %second_param1\n"
7055                 "OpFunctionEnd\n"
7056                 "OpNoLine\n"
7057                 "OpNoLine\n";
7058
7059         fragments["testfun"]            =
7060                 // A %test_code function that returns its argument unchanged.
7061                 "OpNoLine\n"
7062                 "OpNoLine\n"
7063                 "OpLine %name 1 1\n"
7064                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7065                 "OpNoLine\n"
7066                 "%param1 = OpFunctionParameter %v4f32\n"
7067                 "OpNoLine\n"
7068                 "OpNoLine\n"
7069                 "%label_testfun = OpLabel\n"
7070                 "OpNoLine\n"
7071                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7072                 "OpReturnValue %val1\n"
7073                 "OpFunctionEnd\n"
7074                 "OpLine %name 1 1\n"
7075                 "OpNoLine\n";
7076
7077         createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7078
7079         return opLineTests.release();
7080 }
7081
7082 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7083 {
7084         RGBA                                                            defaultColors[4];
7085         de::MovePtr<tcu::TestCaseGroup>         opModuleProcessedTests                  (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7086         map<string, string>                                     fragments;
7087         std::vector<std::string>                        noExtensions;
7088         GraphicsResources                                       resources;
7089
7090         getDefaultColors(defaultColors);
7091         resources.verifyBinary = veryfiBinaryShader;
7092         resources.spirvVersion = SPIRV_VERSION_1_3;
7093
7094         fragments["moduleprocessed"]                                                    =
7095                 "OpModuleProcessed \"VULKAN CTS\"\n"
7096                 "OpModuleProcessed \"Negative values\"\n"
7097                 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7098
7099         fragments["pre_main"]   =
7100                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7101                 "%second_param1 = OpFunctionParameter %v4f32\n"
7102                 "%label_secondfunction = OpLabel\n"
7103                 "OpReturnValue %second_param1\n"
7104                 "OpFunctionEnd\n";
7105
7106         fragments["testfun"]            =
7107                 // A %test_code function that returns its argument unchanged.
7108                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7109                 "%param1 = OpFunctionParameter %v4f32\n"
7110                 "%label_testfun = OpLabel\n"
7111                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7112                 "OpReturnValue %val1\n"
7113                 "OpFunctionEnd\n";
7114
7115         createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7116
7117         return opModuleProcessedTests.release();
7118 }
7119
7120
7121 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7122 {
7123         RGBA                                                                                                    defaultColors[4];
7124         de::MovePtr<tcu::TestCaseGroup>                                                 opLineTests                     (new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7125         map<string, string>                                                                             fragments;
7126         std::vector<std::pair<std::string, std::string> >               problemStrings;
7127
7128         problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7129         problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7130         problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7131         getDefaultColors(defaultColors);
7132
7133         fragments["debug"]                      =
7134                 "%other_name = OpString \"other_name\"\n";
7135
7136         fragments["pre_main"]   =
7137                 "OpLine %file_name 32 0\n"
7138                 "OpLine %file_name 32 32\n"
7139                 "OpLine %file_name 32 40\n"
7140                 "OpLine %other_name 32 40\n"
7141                 "OpLine %other_name 0 100\n"
7142                 "OpLine %other_name 0 4294967295\n"
7143                 "OpLine %other_name 4294967295 0\n"
7144                 "OpLine %other_name 32 40\n"
7145                 "OpLine %file_name 0 0\n"
7146                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7147                 "OpLine %file_name 1 0\n"
7148                 "%second_param1 = OpFunctionParameter %v4f32\n"
7149                 "OpLine %file_name 1 3\n"
7150                 "OpLine %file_name 1 2\n"
7151                 "%label_secondfunction = OpLabel\n"
7152                 "OpLine %file_name 0 2\n"
7153                 "OpReturnValue %second_param1\n"
7154                 "OpFunctionEnd\n"
7155                 "OpLine %file_name 0 2\n"
7156                 "OpLine %file_name 0 2\n";
7157
7158         fragments["testfun"]            =
7159                 // A %test_code function that returns its argument unchanged.
7160                 "OpLine %file_name 1 0\n"
7161                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7162                 "OpLine %file_name 16 330\n"
7163                 "%param1 = OpFunctionParameter %v4f32\n"
7164                 "OpLine %file_name 14 442\n"
7165                 "%label_testfun = OpLabel\n"
7166                 "OpLine %file_name 11 1024\n"
7167                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7168                 "OpLine %file_name 2 97\n"
7169                 "OpReturnValue %val1\n"
7170                 "OpFunctionEnd\n"
7171                 "OpLine %file_name 5 32\n";
7172
7173         for (size_t i = 0; i < problemStrings.size(); ++i)
7174         {
7175                 map<string, string> testFragments = fragments;
7176                 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7177                 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7178         }
7179
7180         return opLineTests.release();
7181 }
7182
7183 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7184 {
7185         de::MovePtr<tcu::TestCaseGroup> opConstantNullTests             (new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7186         RGBA                                                    colors[4];
7187
7188
7189         const char                                              functionStart[] =
7190                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7191                 "%param1 = OpFunctionParameter %v4f32\n"
7192                 "%lbl    = OpLabel\n";
7193
7194         const char                                              functionEnd[]   =
7195                 "OpReturnValue %transformed_param\n"
7196                 "OpFunctionEnd\n";
7197
7198         struct NameConstantsCode
7199         {
7200                 string name;
7201                 string constants;
7202                 string code;
7203         };
7204
7205         NameConstantsCode tests[] =
7206         {
7207                 {
7208                         "vec4",
7209                         "%cnull = OpConstantNull %v4f32\n",
7210                         "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7211                 },
7212                 {
7213                         "float",
7214                         "%cnull = OpConstantNull %f32\n",
7215                         "%vp = OpVariable %fp_v4f32 Function\n"
7216                         "%v  = OpLoad %v4f32 %vp\n"
7217                         "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7218                         "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7219                         "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7220                         "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7221                         "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7222                 },
7223                 {
7224                         "bool",
7225                         "%cnull             = OpConstantNull %bool\n",
7226                         "%v                 = OpVariable %fp_v4f32 Function\n"
7227                         "                     OpStore %v %param1\n"
7228                         "                     OpSelectionMerge %false_label None\n"
7229                         "                     OpBranchConditional %cnull %true_label %false_label\n"
7230                         "%true_label        = OpLabel\n"
7231                         "                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7232                         "                     OpBranch %false_label\n"
7233                         "%false_label       = OpLabel\n"
7234                         "%transformed_param = OpLoad %v4f32 %v\n"
7235                 },
7236                 {
7237                         "i32",
7238                         "%cnull             = OpConstantNull %i32\n",
7239                         "%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7240                         "%b                 = OpIEqual %bool %cnull %c_i32_0\n"
7241                         "                     OpSelectionMerge %false_label None\n"
7242                         "                     OpBranchConditional %b %true_label %false_label\n"
7243                         "%true_label        = OpLabel\n"
7244                         "                     OpStore %v %param1\n"
7245                         "                     OpBranch %false_label\n"
7246                         "%false_label       = OpLabel\n"
7247                         "%transformed_param = OpLoad %v4f32 %v\n"
7248                 },
7249                 {
7250                         "struct",
7251                         "%stype             = OpTypeStruct %f32 %v4f32\n"
7252                         "%fp_stype          = OpTypePointer Function %stype\n"
7253                         "%cnull             = OpConstantNull %stype\n",
7254                         "%v                 = OpVariable %fp_stype Function %cnull\n"
7255                         "%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7256                         "%f_val             = OpLoad %v4f32 %f\n"
7257                         "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7258                 },
7259                 {
7260                         "array",
7261                         "%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
7262                         "%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
7263                         "%cnull             = OpConstantNull %a4_v4f32\n",
7264                         "%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
7265                         "%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7266                         "%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7267                         "%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7268                         "%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7269                         "%f_val             = OpLoad %v4f32 %f\n"
7270                         "%f1_val            = OpLoad %v4f32 %f1\n"
7271                         "%f2_val            = OpLoad %v4f32 %f2\n"
7272                         "%f3_val            = OpLoad %v4f32 %f3\n"
7273                         "%t0                = OpFAdd %v4f32 %param1 %f_val\n"
7274                         "%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
7275                         "%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
7276                         "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7277                 },
7278                 {
7279                         "matrix",
7280                         "%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
7281                         "%cnull             = OpConstantNull %mat4x4_f32\n",
7282                         // Our null matrix * any vector should result in a zero vector.
7283                         "%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7284                         "%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7285                 }
7286         };
7287
7288         getHalfColorsFullAlpha(colors);
7289
7290         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7291         {
7292                 map<string, string> fragments;
7293                 fragments["pre_main"] = tests[testNdx].constants;
7294                 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7295                 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7296         }
7297         return opConstantNullTests.release();
7298 }
7299 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7300 {
7301         de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests                (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7302         RGBA                                                    inputColors[4];
7303         RGBA                                                    outputColors[4];
7304
7305
7306         const char                                              functionStart[]  =
7307                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7308                 "%param1 = OpFunctionParameter %v4f32\n"
7309                 "%lbl    = OpLabel\n";
7310
7311         const char                                              functionEnd[]           =
7312                 "OpReturnValue %transformed_param\n"
7313                 "OpFunctionEnd\n";
7314
7315         struct NameConstantsCode
7316         {
7317                 string name;
7318                 string constants;
7319                 string code;
7320         };
7321
7322         NameConstantsCode tests[] =
7323         {
7324                 {
7325                         "vec4",
7326
7327                         "%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7328                         "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7329                 },
7330                 {
7331                         "struct",
7332
7333                         "%stype             = OpTypeStruct %v4f32 %f32\n"
7334                         "%fp_stype          = OpTypePointer Function %stype\n"
7335                         "%f32_n_1           = OpConstant %f32 -1.0\n"
7336                         "%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
7337                         "%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7338                         "%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
7339
7340                         "%v                 = OpVariable %fp_stype Function %cval\n"
7341                         "%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7342                         "%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
7343                         "%vec_val           = OpLoad %v4f32 %vec_ptr\n"
7344                         "%f32_val           = OpLoad %f32 %f32_ptr\n"
7345                         "%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7346                         "%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7347                         "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7348                 },
7349                 {
7350                         // [1|0|0|0.5] [x] = x + 0.5
7351                         // [0|1|0|0.5] [y] = y + 0.5
7352                         // [0|0|1|0.5] [z] = z + 0.5
7353                         // [0|0|0|1  ] [1] = 1
7354                         "matrix",
7355
7356                         "%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
7357                         "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7358                         "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7359                         "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7360                         "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7361                         "%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7362
7363                         "%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
7364                 },
7365                 {
7366                         "array",
7367
7368                         "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7369                         "%fp_a4f32            = OpTypePointer Function %a4f32\n"
7370                         "%f32_n_1             = OpConstant %f32 -1.0\n"
7371                         "%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
7372                         "%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7373
7374                         "%v                   = OpVariable %fp_a4f32 Function %carr\n"
7375                         "%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
7376                         "%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
7377                         "%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
7378                         "%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
7379                         "%f_val               = OpLoad %f32 %f\n"
7380                         "%f1_val              = OpLoad %f32 %f1\n"
7381                         "%f2_val              = OpLoad %f32 %f2\n"
7382                         "%f3_val              = OpLoad %f32 %f3\n"
7383                         "%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
7384                         "%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
7385                         "%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
7386                         "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7387                         "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7388                 },
7389                 {
7390                         //
7391                         // [
7392                         //   {
7393                         //      0.0,
7394                         //      [ 1.0, 1.0, 1.0, 1.0]
7395                         //   },
7396                         //   {
7397                         //      1.0,
7398                         //      [ 0.0, 0.5, 0.0, 0.0]
7399                         //   }, //     ^^^
7400                         //   {
7401                         //      0.0,
7402                         //      [ 1.0, 1.0, 1.0, 1.0]
7403                         //   }
7404                         // ]
7405                         "array_of_struct_of_array",
7406
7407                         "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7408                         "%fp_a4f32            = OpTypePointer Function %a4f32\n"
7409                         "%stype               = OpTypeStruct %f32 %a4f32\n"
7410                         "%a3stype             = OpTypeArray %stype %c_u32_3\n"
7411                         "%fp_a3stype          = OpTypePointer Function %a3stype\n"
7412                         "%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7413                         "%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7414                         "%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7415                         "%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7416                         "%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7417
7418                         "%v                   = OpVariable %fp_a3stype Function %carr\n"
7419                         "%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7420                         "%f_l                 = OpLoad %f32 %f\n"
7421                         "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7422                         "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7423                 }
7424         };
7425
7426         getHalfColorsFullAlpha(inputColors);
7427         outputColors[0] = RGBA(255, 255, 255, 255);
7428         outputColors[1] = RGBA(255, 127, 127, 255);
7429         outputColors[2] = RGBA(127, 255, 127, 255);
7430         outputColors[3] = RGBA(127, 127, 255, 255);
7431
7432         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7433         {
7434                 map<string, string> fragments;
7435                 fragments["pre_main"] = tests[testNdx].constants;
7436                 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7437                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7438         }
7439         return opConstantCompositeTests.release();
7440 }
7441
7442 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7443 {
7444         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7445         RGBA                                                    inputColors[4];
7446         RGBA                                                    outputColors[4];
7447         map<string, string>                             fragments;
7448
7449         // vec4 test_code(vec4 param) {
7450         //   vec4 result = param;
7451         //   for (int i = 0; i < 4; ++i) {
7452         //     if (i == 0) result[i] = 0.;
7453         //     else        result[i] = 1. - result[i];
7454         //   }
7455         //   return result;
7456         // }
7457         const char                                              function[]                      =
7458                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7459                 "%param1    = OpFunctionParameter %v4f32\n"
7460                 "%lbl       = OpLabel\n"
7461                 "%iptr      = OpVariable %fp_i32 Function\n"
7462                 "%result    = OpVariable %fp_v4f32 Function\n"
7463                 "             OpStore %iptr %c_i32_0\n"
7464                 "             OpStore %result %param1\n"
7465                 "             OpBranch %loop\n"
7466
7467                 // Loop entry block.
7468                 "%loop      = OpLabel\n"
7469                 "%ival      = OpLoad %i32 %iptr\n"
7470                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7471                 "             OpLoopMerge %exit %if_entry None\n"
7472                 "             OpBranchConditional %lt_4 %if_entry %exit\n"
7473
7474                 // Merge block for loop.
7475                 "%exit      = OpLabel\n"
7476                 "%ret       = OpLoad %v4f32 %result\n"
7477                 "             OpReturnValue %ret\n"
7478
7479                 // If-statement entry block.
7480                 "%if_entry  = OpLabel\n"
7481                 "%loc       = OpAccessChain %fp_f32 %result %ival\n"
7482                 "%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
7483                 "             OpSelectionMerge %if_exit None\n"
7484                 "             OpBranchConditional %eq_0 %if_true %if_false\n"
7485
7486                 // False branch for if-statement.
7487                 "%if_false  = OpLabel\n"
7488                 "%val       = OpLoad %f32 %loc\n"
7489                 "%sub       = OpFSub %f32 %c_f32_1 %val\n"
7490                 "             OpStore %loc %sub\n"
7491                 "             OpBranch %if_exit\n"
7492
7493                 // Merge block for if-statement.
7494                 "%if_exit   = OpLabel\n"
7495                 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7496                 "             OpStore %iptr %ival_next\n"
7497                 "             OpBranch %loop\n"
7498
7499                 // True branch for if-statement.
7500                 "%if_true   = OpLabel\n"
7501                 "             OpStore %loc %c_f32_0\n"
7502                 "             OpBranch %if_exit\n"
7503
7504                 "             OpFunctionEnd\n";
7505
7506         fragments["testfun"]    = function;
7507
7508         inputColors[0]                  = RGBA(127, 127, 127, 0);
7509         inputColors[1]                  = RGBA(127, 0,   0,   0);
7510         inputColors[2]                  = RGBA(0,   127, 0,   0);
7511         inputColors[3]                  = RGBA(0,   0,   127, 0);
7512
7513         outputColors[0]                 = RGBA(0, 128, 128, 255);
7514         outputColors[1]                 = RGBA(0, 255, 255, 255);
7515         outputColors[2]                 = RGBA(0, 128, 255, 255);
7516         outputColors[3]                 = RGBA(0, 255, 128, 255);
7517
7518         createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7519
7520         return group.release();
7521 }
7522
7523 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7524 {
7525         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7526         RGBA                                                    inputColors[4];
7527         RGBA                                                    outputColors[4];
7528         map<string, string>                             fragments;
7529
7530         const char                                              typesAndConstants[]     =
7531                 "%c_f32_p2  = OpConstant %f32 0.2\n"
7532                 "%c_f32_p4  = OpConstant %f32 0.4\n"
7533                 "%c_f32_p6  = OpConstant %f32 0.6\n"
7534                 "%c_f32_p8  = OpConstant %f32 0.8\n";
7535
7536         // vec4 test_code(vec4 param) {
7537         //   vec4 result = param;
7538         //   for (int i = 0; i < 4; ++i) {
7539         //     switch (i) {
7540         //       case 0: result[i] += .2; break;
7541         //       case 1: result[i] += .6; break;
7542         //       case 2: result[i] += .4; break;
7543         //       case 3: result[i] += .8; break;
7544         //       default: break; // unreachable
7545         //     }
7546         //   }
7547         //   return result;
7548         // }
7549         const char                                              function[]                      =
7550                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7551                 "%param1    = OpFunctionParameter %v4f32\n"
7552                 "%lbl       = OpLabel\n"
7553                 "%iptr      = OpVariable %fp_i32 Function\n"
7554                 "%result    = OpVariable %fp_v4f32 Function\n"
7555                 "             OpStore %iptr %c_i32_0\n"
7556                 "             OpStore %result %param1\n"
7557                 "             OpBranch %loop\n"
7558
7559                 // Loop entry block.
7560                 "%loop      = OpLabel\n"
7561                 "%ival      = OpLoad %i32 %iptr\n"
7562                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7563                 "             OpLoopMerge %exit %cont None\n"
7564                 "             OpBranchConditional %lt_4 %switch_entry %exit\n"
7565
7566                 // Merge block for loop.
7567                 "%exit      = OpLabel\n"
7568                 "%ret       = OpLoad %v4f32 %result\n"
7569                 "             OpReturnValue %ret\n"
7570
7571                 // Switch-statement entry block.
7572                 "%switch_entry   = OpLabel\n"
7573                 "%loc            = OpAccessChain %fp_f32 %result %ival\n"
7574                 "%val            = OpLoad %f32 %loc\n"
7575                 "                  OpSelectionMerge %switch_exit None\n"
7576                 "                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7577
7578                 "%case2          = OpLabel\n"
7579                 "%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
7580                 "                  OpStore %loc %addp4\n"
7581                 "                  OpBranch %switch_exit\n"
7582
7583                 "%switch_default = OpLabel\n"
7584                 "                  OpUnreachable\n"
7585
7586                 "%case3          = OpLabel\n"
7587                 "%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
7588                 "                  OpStore %loc %addp8\n"
7589                 "                  OpBranch %switch_exit\n"
7590
7591                 "%case0          = OpLabel\n"
7592                 "%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
7593                 "                  OpStore %loc %addp2\n"
7594                 "                  OpBranch %switch_exit\n"
7595
7596                 // Merge block for switch-statement.
7597                 "%switch_exit    = OpLabel\n"
7598                 "%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
7599                 "                  OpStore %iptr %ival_next\n"
7600                 "                  OpBranch %cont\n"
7601                 "%cont           = OpLabel\n"
7602                 "                  OpBranch %loop\n"
7603
7604                 "%case1          = OpLabel\n"
7605                 "%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
7606                 "                  OpStore %loc %addp6\n"
7607                 "                  OpBranch %switch_exit\n"
7608
7609                 "                  OpFunctionEnd\n";
7610
7611         fragments["pre_main"]   = typesAndConstants;
7612         fragments["testfun"]    = function;
7613
7614         inputColors[0]                  = RGBA(127, 27,  127, 51);
7615         inputColors[1]                  = RGBA(127, 0,   0,   51);
7616         inputColors[2]                  = RGBA(0,   27,  0,   51);
7617         inputColors[3]                  = RGBA(0,   0,   127, 51);
7618
7619         outputColors[0]                 = RGBA(178, 180, 229, 255);
7620         outputColors[1]                 = RGBA(178, 153, 102, 255);
7621         outputColors[2]                 = RGBA(51,  180, 102, 255);
7622         outputColors[3]                 = RGBA(51,  153, 229, 255);
7623
7624         createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7625
7626         return group.release();
7627 }
7628
7629 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
7630 {
7631         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
7632         RGBA                                                    inputColors[4];
7633         RGBA                                                    outputColors[4];
7634         map<string, string>                             fragments;
7635
7636         const char                                              decorations[]           =
7637                 "OpDecorate %array_group         ArrayStride 4\n"
7638                 "OpDecorate %struct_member_group Offset 0\n"
7639                 "%array_group         = OpDecorationGroup\n"
7640                 "%struct_member_group = OpDecorationGroup\n"
7641
7642                 "OpDecorate %group1 RelaxedPrecision\n"
7643                 "OpDecorate %group3 RelaxedPrecision\n"
7644                 "OpDecorate %group3 Invariant\n"
7645                 "OpDecorate %group3 Restrict\n"
7646                 "%group0 = OpDecorationGroup\n"
7647                 "%group1 = OpDecorationGroup\n"
7648                 "%group3 = OpDecorationGroup\n";
7649
7650         const char                                              typesAndConstants[]     =
7651                 "%a3f32     = OpTypeArray %f32 %c_u32_3\n"
7652                 "%struct1   = OpTypeStruct %a3f32\n"
7653                 "%struct2   = OpTypeStruct %a3f32\n"
7654                 "%fp_struct1 = OpTypePointer Function %struct1\n"
7655                 "%fp_struct2 = OpTypePointer Function %struct2\n"
7656                 "%c_f32_2    = OpConstant %f32 2.\n"
7657                 "%c_f32_n2   = OpConstant %f32 -2.\n"
7658
7659                 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
7660                 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
7661                 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
7662                 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
7663
7664         const char                                              function[]                      =
7665                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7666                 "%param     = OpFunctionParameter %v4f32\n"
7667                 "%entry     = OpLabel\n"
7668                 "%result    = OpVariable %fp_v4f32 Function\n"
7669                 "%v_struct1 = OpVariable %fp_struct1 Function\n"
7670                 "%v_struct2 = OpVariable %fp_struct2 Function\n"
7671                 "             OpStore %result %param\n"
7672                 "             OpStore %v_struct1 %c_struct1\n"
7673                 "             OpStore %v_struct2 %c_struct2\n"
7674                 "%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
7675                 "%val1      = OpLoad %f32 %ptr1\n"
7676                 "%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
7677                 "%val2      = OpLoad %f32 %ptr2\n"
7678                 "%addvalues = OpFAdd %f32 %val1 %val2\n"
7679                 "%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
7680                 "%val       = OpLoad %f32 %ptr\n"
7681                 "%addresult = OpFAdd %f32 %addvalues %val\n"
7682                 "             OpStore %ptr %addresult\n"
7683                 "%ret       = OpLoad %v4f32 %result\n"
7684                 "             OpReturnValue %ret\n"
7685                 "             OpFunctionEnd\n";
7686
7687         struct CaseNameDecoration
7688         {
7689                 string name;
7690                 string decoration;
7691         };
7692
7693         CaseNameDecoration tests[] =
7694         {
7695                 {
7696                         "same_decoration_group_on_multiple_types",
7697                         "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
7698                 },
7699                 {
7700                         "empty_decoration_group",
7701                         "OpGroupDecorate %group0      %a3f32\n"
7702                         "OpGroupDecorate %group0      %result\n"
7703                 },
7704                 {
7705                         "one_element_decoration_group",
7706                         "OpGroupDecorate %array_group %a3f32\n"
7707                 },
7708                 {
7709                         "multiple_elements_decoration_group",
7710                         "OpGroupDecorate %group3      %v_struct1\n"
7711                 },
7712                 {
7713                         "multiple_decoration_groups_on_same_variable",
7714                         "OpGroupDecorate %group0      %v_struct2\n"
7715                         "OpGroupDecorate %group1      %v_struct2\n"
7716                         "OpGroupDecorate %group3      %v_struct2\n"
7717                 },
7718                 {
7719                         "same_decoration_group_multiple_times",
7720                         "OpGroupDecorate %group1      %addvalues\n"
7721                         "OpGroupDecorate %group1      %addvalues\n"
7722                         "OpGroupDecorate %group1      %addvalues\n"
7723                 },
7724
7725         };
7726
7727         getHalfColorsFullAlpha(inputColors);
7728         getHalfColorsFullAlpha(outputColors);
7729
7730         for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
7731         {
7732                 fragments["decoration"] = decorations + tests[idx].decoration;
7733                 fragments["pre_main"]   = typesAndConstants;
7734                 fragments["testfun"]    = function;
7735
7736                 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
7737         }
7738
7739         return group.release();
7740 }
7741
7742 struct SpecConstantTwoIntGraphicsCase
7743 {
7744         const char*             caseName;
7745         const char*             scDefinition0;
7746         const char*             scDefinition1;
7747         const char*             scResultType;
7748         const char*             scOperation;
7749         deInt32                 scActualValue0;
7750         deInt32                 scActualValue1;
7751         const char*             resultOperation;
7752         RGBA                    expectedColors[4];
7753         deInt32                 scActualValueLength;
7754
7755                                         SpecConstantTwoIntGraphicsCase (const char*             name,
7756                                                                                                         const char*             definition0,
7757                                                                                                         const char*             definition1,
7758                                                                                                         const char*             resultType,
7759                                                                                                         const char*             operation,
7760                                                                                                         const deInt32   value0,
7761                                                                                                         const deInt32   value1,
7762                                                                                                         const char*             resultOp,
7763                                                                                                         const RGBA              (&output)[4],
7764                                                                                                         const deInt32   valueLength = sizeof(deInt32))
7765                                                 : caseName                              (name)
7766                                                 , scDefinition0                 (definition0)
7767                                                 , scDefinition1                 (definition1)
7768                                                 , scResultType                  (resultType)
7769                                                 , scOperation                   (operation)
7770                                                 , scActualValue0                (value0)
7771                                                 , scActualValue1                (value1)
7772                                                 , resultOperation               (resultOp)
7773                                                 , scActualValueLength   (valueLength)
7774         {
7775                 expectedColors[0] = output[0];
7776                 expectedColors[1] = output[1];
7777                 expectedColors[2] = output[2];
7778                 expectedColors[3] = output[3];
7779         }
7780 };
7781
7782 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
7783 {
7784         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
7785         vector<SpecConstantTwoIntGraphicsCase>  cases;
7786         RGBA                                                    inputColors[4];
7787         RGBA                                                    outputColors0[4];
7788         RGBA                                                    outputColors1[4];
7789         RGBA                                                    outputColors2[4];
7790
7791         const deInt32                                   m1AsFloat16                     = 0xbc00; // -1(fp16) == 1 01111 0000000000 == 1011 1100 0000 0000
7792
7793         const char      decorations1[]                  =
7794                 "OpDecorate %sc_0  SpecId 0\n"
7795                 "OpDecorate %sc_1  SpecId 1\n";
7796
7797         const char      typesAndConstants1[]    =
7798                 "${OPTYPE_DEFINITIONS:opt}"
7799                 "%sc_0      = OpSpecConstant${SC_DEF0}\n"
7800                 "%sc_1      = OpSpecConstant${SC_DEF1}\n"
7801                 "%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
7802
7803         const char      function1[]                             =
7804                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7805                 "%param     = OpFunctionParameter %v4f32\n"
7806                 "%label     = OpLabel\n"
7807                 "%result    = OpVariable %fp_v4f32 Function\n"
7808                 "${TYPE_CONVERT:opt}"
7809                 "             OpStore %result %param\n"
7810                 "%gen       = ${GEN_RESULT}\n"
7811                 "%index     = OpIAdd %i32 %gen %c_i32_1\n"
7812                 "%loc       = OpAccessChain %fp_f32 %result %index\n"
7813                 "%val       = OpLoad %f32 %loc\n"
7814                 "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
7815                 "             OpStore %loc %add\n"
7816                 "%ret       = OpLoad %v4f32 %result\n"
7817                 "             OpReturnValue %ret\n"
7818                 "             OpFunctionEnd\n";
7819
7820         inputColors[0] = RGBA(127, 127, 127, 255);
7821         inputColors[1] = RGBA(127, 0,   0,   255);
7822         inputColors[2] = RGBA(0,   127, 0,   255);
7823         inputColors[3] = RGBA(0,   0,   127, 255);
7824
7825         // Derived from inputColors[x] by adding 128 to inputColors[x][0].
7826         outputColors0[0] = RGBA(255, 127, 127, 255);
7827         outputColors0[1] = RGBA(255, 0,   0,   255);
7828         outputColors0[2] = RGBA(128, 127, 0,   255);
7829         outputColors0[3] = RGBA(128, 0,   127, 255);
7830
7831         // Derived from inputColors[x] by adding 128 to inputColors[x][1].
7832         outputColors1[0] = RGBA(127, 255, 127, 255);
7833         outputColors1[1] = RGBA(127, 128, 0,   255);
7834         outputColors1[2] = RGBA(0,   255, 0,   255);
7835         outputColors1[3] = RGBA(0,   128, 127, 255);
7836
7837         // Derived from inputColors[x] by adding 128 to inputColors[x][2].
7838         outputColors2[0] = RGBA(127, 127, 255, 255);
7839         outputColors2[1] = RGBA(127, 0,   128, 255);
7840         outputColors2[2] = RGBA(0,   127, 128, 255);
7841         outputColors2[3] = RGBA(0,   0,   255, 255);
7842
7843         const char addZeroToSc[]                = "OpIAdd %i32 %c_i32_0 %sc_op";
7844         const char addZeroToSc32[]              = "OpIAdd %i32 %c_i32_0 %sc_op32";
7845         const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
7846         const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
7847
7848         cases.push_back(SpecConstantTwoIntGraphicsCase("iadd",                                  " %i32 0",              " %i32 0",              "%i32",         "IAdd                 %sc_0 %sc_1",                             19,             -20,    addZeroToSc,            outputColors0));
7849         cases.push_back(SpecConstantTwoIntGraphicsCase("isub",                                  " %i32 0",              " %i32 0",              "%i32",         "ISub                 %sc_0 %sc_1",                             19,             20,             addZeroToSc,            outputColors0));
7850         cases.push_back(SpecConstantTwoIntGraphicsCase("imul",                                  " %i32 0",              " %i32 0",              "%i32",         "IMul                 %sc_0 %sc_1",                             -1,             -1,             addZeroToSc,            outputColors2));
7851         cases.push_back(SpecConstantTwoIntGraphicsCase("sdiv",                                  " %i32 0",              " %i32 0",              "%i32",         "SDiv                 %sc_0 %sc_1",                             -126,   126,    addZeroToSc,            outputColors0));
7852         cases.push_back(SpecConstantTwoIntGraphicsCase("udiv",                                  " %i32 0",              " %i32 0",              "%i32",         "UDiv                 %sc_0 %sc_1",                             126,    126,    addZeroToSc,            outputColors2));
7853         cases.push_back(SpecConstantTwoIntGraphicsCase("srem",                                  " %i32 0",              " %i32 0",              "%i32",         "SRem                 %sc_0 %sc_1",                             3,              2,              addZeroToSc,            outputColors2));
7854         cases.push_back(SpecConstantTwoIntGraphicsCase("smod",                                  " %i32 0",              " %i32 0",              "%i32",         "SMod                 %sc_0 %sc_1",                             3,              2,              addZeroToSc,            outputColors2));
7855         cases.push_back(SpecConstantTwoIntGraphicsCase("umod",                                  " %i32 0",              " %i32 0",              "%i32",         "UMod                 %sc_0 %sc_1",                             1001,   500,    addZeroToSc,            outputColors2));
7856         cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseand",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseAnd           %sc_0 %sc_1",                             0x33,   0x0d,   addZeroToSc,            outputColors2));
7857         cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseor",                             " %i32 0",              " %i32 0",              "%i32",         "BitwiseOr            %sc_0 %sc_1",                             0,              1,              addZeroToSc,            outputColors2));
7858         cases.push_back(SpecConstantTwoIntGraphicsCase("bitwisexor",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseXor           %sc_0 %sc_1",                             0x2e,   0x2f,   addZeroToSc,            outputColors2));
7859         cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightlogical",             " %i32 0",              " %i32 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                             2,              1,              addZeroToSc,            outputColors2));
7860         cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightarithmetic",  " %i32 0",              " %i32 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                             -4,             2,              addZeroToSc,            outputColors0));
7861         cases.push_back(SpecConstantTwoIntGraphicsCase("shiftleftlogical",              " %i32 0",              " %i32 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                             1,              0,              addZeroToSc,            outputColors2));
7862         cases.push_back(SpecConstantTwoIntGraphicsCase("slessthan",                             " %i32 0",              " %i32 0",              "%bool",        "SLessThan            %sc_0 %sc_1",                             -20,    -10,    selectTrueUsingSc,      outputColors2));
7863         cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthan",                             " %i32 0",              " %i32 0",              "%bool",        "ULessThan            %sc_0 %sc_1",                             10,             20,             selectTrueUsingSc,      outputColors2));
7864         cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "SGreaterThan         %sc_0 %sc_1",                             -1000,  50,             selectFalseUsingSc,     outputColors2));
7865         cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "UGreaterThan         %sc_0 %sc_1",                             10,             5,              selectTrueUsingSc,      outputColors2));
7866         cases.push_back(SpecConstantTwoIntGraphicsCase("slessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "SLessThanEqual       %sc_0 %sc_1",                             -10,    -10,    selectTrueUsingSc,      outputColors2));
7867         cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "ULessThanEqual       %sc_0 %sc_1",                             50,             100,    selectTrueUsingSc,      outputColors2));
7868         cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "SGreaterThanEqual    %sc_0 %sc_1",                             -1000,  50,             selectFalseUsingSc,     outputColors2));
7869         cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "UGreaterThanEqual    %sc_0 %sc_1",                             10,             10,             selectTrueUsingSc,      outputColors2));
7870         cases.push_back(SpecConstantTwoIntGraphicsCase("iequal",                                " %i32 0",              " %i32 0",              "%bool",        "IEqual               %sc_0 %sc_1",                             42,             24,             selectFalseUsingSc,     outputColors2));
7871         cases.push_back(SpecConstantTwoIntGraphicsCase("inotequal",                             " %i32 0",              " %i32 0",              "%bool",        "INotEqual            %sc_0 %sc_1",                             42,             24,             selectTrueUsingSc,      outputColors2));
7872         cases.push_back(SpecConstantTwoIntGraphicsCase("logicaland",                    "True %bool",   "True %bool",   "%bool",        "LogicalAnd           %sc_0 %sc_1",                             0,              1,              selectFalseUsingSc,     outputColors2));
7873         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalor",                             "False %bool",  "False %bool",  "%bool",        "LogicalOr            %sc_0 %sc_1",                             1,              0,              selectTrueUsingSc,      outputColors2));
7874         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalequal",                  "True %bool",   "True %bool",   "%bool",        "LogicalEqual         %sc_0 %sc_1",                             0,              1,              selectFalseUsingSc,     outputColors2));
7875         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnotequal",               "False %bool",  "False %bool",  "%bool",        "LogicalNotEqual      %sc_0 %sc_1",                             1,              0,              selectTrueUsingSc,      outputColors2));
7876         cases.push_back(SpecConstantTwoIntGraphicsCase("snegate",                               " %i32 0",              " %i32 0",              "%i32",         "SNegate              %sc_0",                                   -1,             0,              addZeroToSc,            outputColors2));
7877         cases.push_back(SpecConstantTwoIntGraphicsCase("not",                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                                   -2,             0,              addZeroToSc,            outputColors2));
7878         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                                   1,              0,              selectFalseUsingSc,     outputColors2));
7879         cases.push_back(SpecConstantTwoIntGraphicsCase("select",                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %c_i32_0",    1,              1,              addZeroToSc,            outputColors2));
7880         cases.push_back(SpecConstantTwoIntGraphicsCase("sconvert",                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                                   -1,             0,              addZeroToSc32,          outputColors0));
7881         // -1082130432 stored as 32-bit two's complement is the binary representation of -1 as IEEE-754 Float
7882         cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert",                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                                   -1082130432, 0, addZeroToSc32,          outputColors0));
7883         cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert16",                    " %f16 0",              " %f16 0",              "%f32",         "FConvert             %sc_0",                                   m1AsFloat16, 0, addZeroToSc32,          outputColors0, sizeof(deFloat16)));
7884         // \todo[2015-12-1 antiagainst] OpQuantizeToF16
7885
7886         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7887         {
7888                 map<string, string>                     specializations;
7889                 map<string, string>                     fragments;
7890                 SpecConstants                           specConstants;
7891                 PushConstants                           noPushConstants;
7892                 GraphicsResources                       noResources;
7893                 GraphicsInterfaces                      noInterfaces;
7894                 vector<string>                          extensions;
7895                 VulkanFeatures                          requiredFeatures;
7896
7897                 // Special SPIR-V code for SConvert-case
7898                 if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
7899                 {
7900                         requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
7901                         fragments["capability"]                                 = "OpCapability Int16\n";                                       // Adds 16-bit integer capability
7902                         specializations["OPTYPE_DEFINITIONS"]   = "%i16 = OpTypeInt 16 1\n";                            // Adds 16-bit integer type
7903                         specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpSConvert %i32 %sc_op\n";        // Converts 16-bit integer to 32-bit integer
7904                 }
7905
7906                 // Special SPIR-V code for FConvert-case
7907                 if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
7908                 {
7909                         requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
7910                         fragments["capability"]                                 = "OpCapability Float64\n";                                     // Adds 64-bit float capability
7911                         specializations["OPTYPE_DEFINITIONS"]   = "%f64 = OpTypeFloat 64\n";                            // Adds 64-bit float type
7912                         specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpConvertFToS %i32 %sc_op\n";     // Converts 64-bit float to 32-bit integer
7913                 }
7914
7915                 // Special SPIR-V code for FConvert-case for 16-bit floats
7916                 if (strcmp(cases[caseNdx].caseName, "fconvert16") == 0)
7917                 {
7918                         extensions.push_back("VK_KHR_shader_float16_int8");
7919                         requiredFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
7920                         fragments["capability"]                                 = "OpCapability Float16\n";                                     // Adds 16-bit float capability
7921                         specializations["OPTYPE_DEFINITIONS"]   = "%f16 = OpTypeFloat 16\n";                            // Adds 16-bit float type
7922                         specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpConvertFToS %i32 %sc_op\n";     // Converts 16-bit float to 32-bit integer
7923                 }
7924
7925                 specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
7926                 specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
7927                 specializations["SC_RESULT_TYPE"]       = cases[caseNdx].scResultType;
7928                 specializations["SC_OP"]                        = cases[caseNdx].scOperation;
7929                 specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
7930
7931                 fragments["decoration"]                         = tcu::StringTemplate(decorations1).specialize(specializations);
7932                 fragments["pre_main"]                           = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
7933                 fragments["testfun"]                            = tcu::StringTemplate(function1).specialize(specializations);
7934
7935                 specConstants.append(&cases[caseNdx].scActualValue0, cases[caseNdx].scActualValueLength);
7936                 specConstants.append(&cases[caseNdx].scActualValue1, cases[caseNdx].scActualValueLength);
7937
7938                 createTestsForAllStages(
7939                         cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
7940                         noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
7941         }
7942
7943         const char      decorations2[]                  =
7944                 "OpDecorate %sc_0  SpecId 0\n"
7945                 "OpDecorate %sc_1  SpecId 1\n"
7946                 "OpDecorate %sc_2  SpecId 2\n";
7947
7948         const char      typesAndConstants2[]    =
7949                 "%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
7950                 "%vec3_undef  = OpUndef %v3i32\n"
7951
7952                 "%sc_0        = OpSpecConstant %i32 0\n"
7953                 "%sc_1        = OpSpecConstant %i32 0\n"
7954                 "%sc_2        = OpSpecConstant %i32 0\n"
7955                 "%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"                                                 // (sc_0, 0,    0)
7956                 "%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"                                                 // (0,    sc_1, 0)
7957                 "%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"                                                 // (0,    0,    sc_2)
7958                 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
7959                 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
7960                 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
7961                 "%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
7962                 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
7963                 "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
7964                 "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
7965                 "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
7966                 "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
7967                 "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";                                                             // (sc_2 - sc_0) * sc_1
7968
7969         const char      function2[]                             =
7970                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7971                 "%param     = OpFunctionParameter %v4f32\n"
7972                 "%label     = OpLabel\n"
7973                 "%result    = OpVariable %fp_v4f32 Function\n"
7974                 "             OpStore %result %param\n"
7975                 "%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
7976                 "%val       = OpLoad %f32 %loc\n"
7977                 "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
7978                 "             OpStore %loc %add\n"
7979                 "%ret       = OpLoad %v4f32 %result\n"
7980                 "             OpReturnValue %ret\n"
7981                 "             OpFunctionEnd\n";
7982
7983         map<string, string>     fragments;
7984         SpecConstants           specConstants;
7985
7986         fragments["decoration"] = decorations2;
7987         fragments["pre_main"]   = typesAndConstants2;
7988         fragments["testfun"]    = function2;
7989
7990         specConstants.append<deInt32>(56789);
7991         specConstants.append<deInt32>(-2);
7992         specConstants.append<deInt32>(56788);
7993
7994         createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
7995
7996         return group.release();
7997 }
7998
7999 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8000 {
8001         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8002         RGBA                                                    inputColors[4];
8003         RGBA                                                    outputColors1[4];
8004         RGBA                                                    outputColors2[4];
8005         RGBA                                                    outputColors3[4];
8006         RGBA                                                    outputColors4[4];
8007         map<string, string>                             fragments1;
8008         map<string, string>                             fragments2;
8009         map<string, string>                             fragments3;
8010         map<string, string>                             fragments4;
8011         std::vector<std::string>                extensions4;
8012         GraphicsResources                               resources4;
8013         VulkanFeatures                                  vulkanFeatures4;
8014
8015         const char      typesAndConstants1[]    =
8016                 "%c_f32_p2  = OpConstant %f32 0.2\n"
8017                 "%c_f32_p4  = OpConstant %f32 0.4\n"
8018                 "%c_f32_p5  = OpConstant %f32 0.5\n"
8019                 "%c_f32_p8  = OpConstant %f32 0.8\n";
8020
8021         // vec4 test_code(vec4 param) {
8022         //   vec4 result = param;
8023         //   for (int i = 0; i < 4; ++i) {
8024         //     float operand;
8025         //     switch (i) {
8026         //       case 0: operand = .2; break;
8027         //       case 1: operand = .5; break;
8028         //       case 2: operand = .4; break;
8029         //       case 3: operand = .0; break;
8030         //       default: break; // unreachable
8031         //     }
8032         //     result[i] += operand;
8033         //   }
8034         //   return result;
8035         // }
8036         const char      function1[]                             =
8037                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8038                 "%param1    = OpFunctionParameter %v4f32\n"
8039                 "%lbl       = OpLabel\n"
8040                 "%iptr      = OpVariable %fp_i32 Function\n"
8041                 "%result    = OpVariable %fp_v4f32 Function\n"
8042                 "             OpStore %iptr %c_i32_0\n"
8043                 "             OpStore %result %param1\n"
8044                 "             OpBranch %loop\n"
8045
8046                 "%loop      = OpLabel\n"
8047                 "%ival      = OpLoad %i32 %iptr\n"
8048                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8049                 "             OpLoopMerge %exit %cont None\n"
8050                 "             OpBranchConditional %lt_4 %entry %exit\n"
8051
8052                 "%entry     = OpLabel\n"
8053                 "%loc       = OpAccessChain %fp_f32 %result %ival\n"
8054                 "%val       = OpLoad %f32 %loc\n"
8055                 "             OpSelectionMerge %phi None\n"
8056                 "             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8057
8058                 "%case0     = OpLabel\n"
8059                 "             OpBranch %phi\n"
8060                 "%case1     = OpLabel\n"
8061                 "             OpBranch %phi\n"
8062                 "%case2     = OpLabel\n"
8063                 "             OpBranch %phi\n"
8064                 "%case3     = OpLabel\n"
8065                 "             OpBranch %phi\n"
8066
8067                 "%default   = OpLabel\n"
8068                 "             OpUnreachable\n"
8069
8070                 "%phi       = OpLabel\n"
8071                 "%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8072                 "             OpBranch %cont\n"
8073                 "%cont      = OpLabel\n"
8074                 "%add       = OpFAdd %f32 %val %operand\n"
8075                 "             OpStore %loc %add\n"
8076                 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8077                 "             OpStore %iptr %ival_next\n"
8078                 "             OpBranch %loop\n"
8079
8080                 "%exit      = OpLabel\n"
8081                 "%ret       = OpLoad %v4f32 %result\n"
8082                 "             OpReturnValue %ret\n"
8083
8084                 "             OpFunctionEnd\n";
8085
8086         fragments1["pre_main"]  = typesAndConstants1;
8087         fragments1["testfun"]   = function1;
8088
8089         getHalfColorsFullAlpha(inputColors);
8090
8091         outputColors1[0]                = RGBA(178, 255, 229, 255);
8092         outputColors1[1]                = RGBA(178, 127, 102, 255);
8093         outputColors1[2]                = RGBA(51,  255, 102, 255);
8094         outputColors1[3]                = RGBA(51,  127, 229, 255);
8095
8096         createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8097
8098         const char      typesAndConstants2[]    =
8099                 "%c_f32_p2  = OpConstant %f32 0.2\n";
8100
8101         // Add .4 to the second element of the given parameter.
8102         const char      function2[]                             =
8103                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8104                 "%param     = OpFunctionParameter %v4f32\n"
8105                 "%entry     = OpLabel\n"
8106                 "%result    = OpVariable %fp_v4f32 Function\n"
8107                 "             OpStore %result %param\n"
8108                 "%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8109                 "%val       = OpLoad %f32 %loc\n"
8110                 "             OpBranch %phi\n"
8111
8112                 "%phi        = OpLabel\n"
8113                 "%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
8114                 "%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
8115                 "%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
8116                 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8117                 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8118                 "              OpLoopMerge %exit %phi None\n"
8119                 "              OpBranchConditional %still_loop %phi %exit\n"
8120
8121                 "%exit       = OpLabel\n"
8122                 "              OpStore %loc %accum\n"
8123                 "%ret        = OpLoad %v4f32 %result\n"
8124                 "              OpReturnValue %ret\n"
8125
8126                 "              OpFunctionEnd\n";
8127
8128         fragments2["pre_main"]  = typesAndConstants2;
8129         fragments2["testfun"]   = function2;
8130
8131         outputColors2[0]                        = RGBA(127, 229, 127, 255);
8132         outputColors2[1]                        = RGBA(127, 102, 0,   255);
8133         outputColors2[2]                        = RGBA(0,   229, 0,   255);
8134         outputColors2[3]                        = RGBA(0,   102, 127, 255);
8135
8136         createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8137
8138         const char      typesAndConstants3[]    =
8139                 "%true      = OpConstantTrue %bool\n"
8140                 "%false     = OpConstantFalse %bool\n"
8141                 "%c_f32_p2  = OpConstant %f32 0.2\n";
8142
8143         // Swap the second and the third element of the given parameter.
8144         const char      function3[]                             =
8145                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8146                 "%param     = OpFunctionParameter %v4f32\n"
8147                 "%entry     = OpLabel\n"
8148                 "%result    = OpVariable %fp_v4f32 Function\n"
8149                 "             OpStore %result %param\n"
8150                 "%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
8151                 "%a_init    = OpLoad %f32 %a_loc\n"
8152                 "%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
8153                 "%b_init    = OpLoad %f32 %b_loc\n"
8154                 "             OpBranch %phi\n"
8155
8156                 "%phi        = OpLabel\n"
8157                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8158                 "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
8159                 "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
8160                 "              OpLoopMerge %exit %phi None\n"
8161                 "              OpBranchConditional %still_loop %phi %exit\n"
8162
8163                 "%exit       = OpLabel\n"
8164                 "              OpStore %a_loc %a_next\n"
8165                 "              OpStore %b_loc %b_next\n"
8166                 "%ret        = OpLoad %v4f32 %result\n"
8167                 "              OpReturnValue %ret\n"
8168
8169                 "              OpFunctionEnd\n";
8170
8171         fragments3["pre_main"]  = typesAndConstants3;
8172         fragments3["testfun"]   = function3;
8173
8174         outputColors3[0]                        = RGBA(127, 127, 127, 255);
8175         outputColors3[1]                        = RGBA(127, 0,   0,   255);
8176         outputColors3[2]                        = RGBA(0,   0,   127, 255);
8177         outputColors3[3]                        = RGBA(0,   127, 0,   255);
8178
8179         createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8180
8181         const char      typesAndConstants4[]    =
8182                 "%f16        = OpTypeFloat 16\n"
8183                 "%v4f16      = OpTypeVector %f16 4\n"
8184                 "%fp_f16     = OpTypePointer Function %f16\n"
8185                 "%fp_v4f16   = OpTypePointer Function %v4f16\n"
8186                 "%true       = OpConstantTrue %bool\n"
8187                 "%false      = OpConstantFalse %bool\n"
8188                 "%c_f32_p2   = OpConstant %f32 0.2\n";
8189
8190         // Swap the second and the third element of the given parameter.
8191         const char      function4[]                             =
8192                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8193                 "%param      = OpFunctionParameter %v4f32\n"
8194                 "%entry      = OpLabel\n"
8195                 "%result     = OpVariable %fp_v4f16 Function\n"
8196                 "%param16    = OpFConvert %v4f16 %param\n"
8197                 "              OpStore %result %param16\n"
8198                 "%a_loc      = OpAccessChain %fp_f16 %result %c_i32_1\n"
8199                 "%a_init     = OpLoad %f16 %a_loc\n"
8200                 "%b_loc      = OpAccessChain %fp_f16 %result %c_i32_2\n"
8201                 "%b_init     = OpLoad %f16 %b_loc\n"
8202                 "              OpBranch %phi\n"
8203
8204                 "%phi        = OpLabel\n"
8205                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8206                 "%a_next     = OpPhi %f16  %a_init %entry %b_next %phi\n"
8207                 "%b_next     = OpPhi %f16  %b_init %entry %a_next %phi\n"
8208                 "              OpLoopMerge %exit %phi None\n"
8209                 "              OpBranchConditional %still_loop %phi %exit\n"
8210
8211                 "%exit       = OpLabel\n"
8212                 "              OpStore %a_loc %a_next\n"
8213                 "              OpStore %b_loc %b_next\n"
8214                 "%ret16      = OpLoad %v4f16 %result\n"
8215                 "%ret        = OpFConvert %v4f32 %ret16\n"
8216                 "              OpReturnValue %ret\n"
8217
8218                 "              OpFunctionEnd\n";
8219
8220         fragments4["pre_main"]          = typesAndConstants4;
8221         fragments4["testfun"]           = function4;
8222         fragments4["capability"]        = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
8223         fragments4["extension"]         = "OpExtension \"SPV_KHR_16bit_storage\"";
8224
8225         extensions4.push_back("VK_KHR_16bit_storage");
8226         extensions4.push_back("VK_KHR_shader_float16_int8");
8227
8228         vulkanFeatures4.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
8229         vulkanFeatures4.extFloat16Int8  = EXTFLOAT16INT8FEATURES_FLOAT16;
8230
8231         outputColors4[0]                        = RGBA(127, 127, 127, 255);
8232         outputColors4[1]                        = RGBA(127, 0,   0,   255);
8233         outputColors4[2]                        = RGBA(0,   0,   127, 255);
8234         outputColors4[3]                        = RGBA(0,   127, 0,   255);
8235
8236         createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8237
8238         return group.release();
8239 }
8240
8241 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8242 {
8243         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8244         RGBA                                                    inputColors[4];
8245         RGBA                                                    outputColors[4];
8246
8247         // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8248         // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8249         // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8250         // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8251         const char                                              constantsAndTypes[]      =
8252                 "%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8253                 "%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8254                 "%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8255                 "%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8256                 "%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
8257
8258         const char                                              function[]       =
8259                 "%test_code      = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8260                 "%param          = OpFunctionParameter %v4f32\n"
8261                 "%label          = OpLabel\n"
8262                 "%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8263                 "%var2           = OpVariable %fp_f32 Function\n"
8264                 "%red            = OpCompositeExtract %f32 %param 0\n"
8265                 "%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8266                 "                  OpStore %var2 %plus_red\n"
8267                 "%val1           = OpLoad %f32 %var1\n"
8268                 "%val2           = OpLoad %f32 %var2\n"
8269                 "%mul            = OpFMul %f32 %val1 %val2\n"
8270                 "%add            = OpFAdd %f32 %mul %c_f32_n1\n"
8271                 "%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
8272                 "%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8273                 "%success        = OpLogicalOr %bool %is0 %isn1n24\n"
8274                 "%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
8275                 "%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8276                 "                  OpReturnValue %ret\n"
8277                 "                  OpFunctionEnd\n";
8278
8279         struct CaseNameDecoration
8280         {
8281                 string name;
8282                 string decoration;
8283         };
8284
8285
8286         CaseNameDecoration tests[] = {
8287                 {"multiplication",      "OpDecorate %mul NoContraction"},
8288                 {"addition",            "OpDecorate %add NoContraction"},
8289                 {"both",                        "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8290         };
8291
8292         getHalfColorsFullAlpha(inputColors);
8293
8294         for (deUint8 idx = 0; idx < 4; ++idx)
8295         {
8296                 inputColors[idx].setRed(0);
8297                 outputColors[idx] = RGBA(0, 0, 0, 255);
8298         }
8299
8300         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8301         {
8302                 map<string, string> fragments;
8303
8304                 fragments["decoration"] = tests[testNdx].decoration;
8305                 fragments["pre_main"] = constantsAndTypes;
8306                 fragments["testfun"] = function;
8307
8308                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8309         }
8310
8311         return group.release();
8312 }
8313
8314 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8315 {
8316         de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8317         RGBA                                                    colors[4];
8318
8319         const char                                              constantsAndTypes[]      =
8320                 "%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8321                 "%fp_a2f32          = OpTypePointer Function %a2f32\n"
8322                 "%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
8323                 "%fp_stype          = OpTypePointer Function %stype\n";
8324
8325         const char                                              function[]       =
8326                 "%test_code         = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8327                 "%param1            = OpFunctionParameter %v4f32\n"
8328                 "%lbl               = OpLabel\n"
8329                 "%v1                = OpVariable %fp_v4f32 Function\n"
8330                 "%v2                = OpVariable %fp_a2f32 Function\n"
8331                 "%v3                = OpVariable %fp_f32 Function\n"
8332                 "%v                 = OpVariable %fp_stype Function\n"
8333                 "%vv                = OpVariable %fp_stype Function\n"
8334                 "%vvv               = OpVariable %fp_f32 Function\n"
8335
8336                 "                     OpStore %v1 %c_v4f32_1_1_1_1\n"
8337                 "                     OpStore %v2 %c_a2f32_1\n"
8338                 "                     OpStore %v3 %c_f32_1\n"
8339
8340                 "%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8341                 "%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8342                 "%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
8343                 "%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
8344                 "%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
8345                 "%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
8346
8347                 "                    OpStore %p_v4f32 %v1_v ${access_type}\n"
8348                 "                    OpStore %p_a2f32 %v2_v ${access_type}\n"
8349                 "                    OpStore %p_f32 %v3_v ${access_type}\n"
8350
8351                 "                    OpCopyMemory %vv %v ${access_type}\n"
8352                 "                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
8353
8354                 "%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8355                 "%v_f32_2          = OpLoad %f32 %p_f32_2\n"
8356                 "%v_f32_3          = OpLoad %f32 %vvv\n"
8357
8358                 "%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8359                 "%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8360                 "                    OpReturnValue %ret2\n"
8361                 "                    OpFunctionEnd\n";
8362
8363         struct NameMemoryAccess
8364         {
8365                 string name;
8366                 string accessType;
8367         };
8368
8369
8370         NameMemoryAccess tests[] =
8371         {
8372                 { "none", "" },
8373                 { "volatile", "Volatile" },
8374                 { "aligned",  "Aligned 1" },
8375                 { "volatile_aligned",  "Volatile|Aligned 1" },
8376                 { "nontemporal_aligned",  "Nontemporal|Aligned 1" },
8377                 { "volatile_nontemporal",  "Volatile|Nontemporal" },
8378                 { "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
8379         };
8380
8381         getHalfColorsFullAlpha(colors);
8382
8383         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8384         {
8385                 map<string, string> fragments;
8386                 map<string, string> memoryAccess;
8387                 memoryAccess["access_type"] = tests[testNdx].accessType;
8388
8389                 fragments["pre_main"] = constantsAndTypes;
8390                 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8391                 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8392         }
8393         return memoryAccessTests.release();
8394 }
8395 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8396 {
8397         de::MovePtr<tcu::TestCaseGroup>         opUndefTests             (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8398         RGBA                                                            defaultColors[4];
8399         map<string, string>                                     fragments;
8400         getDefaultColors(defaultColors);
8401
8402         // First, simple cases that don't do anything with the OpUndef result.
8403         struct NameCodePair { string name, decl, type; };
8404         const NameCodePair tests[] =
8405         {
8406                 {"bool", "", "%bool"},
8407                 {"vec2uint32", "", "%v2u32"},
8408                 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8409                 {"sampler", "%type = OpTypeSampler", "%type"},
8410                 {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8411                 {"pointer", "", "%fp_i32"},
8412                 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8413                 {"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8414                 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8415         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8416         {
8417                 fragments["undef_type"] = tests[testNdx].type;
8418                 fragments["testfun"] = StringTemplate(
8419                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8420                         "%param1 = OpFunctionParameter %v4f32\n"
8421                         "%label_testfun = OpLabel\n"
8422                         "%undef = OpUndef ${undef_type}\n"
8423                         "OpReturnValue %param1\n"
8424                         "OpFunctionEnd\n").specialize(fragments);
8425                 fragments["pre_main"] = tests[testNdx].decl;
8426                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8427         }
8428         fragments.clear();
8429
8430         fragments["testfun"] =
8431                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8432                 "%param1 = OpFunctionParameter %v4f32\n"
8433                 "%label_testfun = OpLabel\n"
8434                 "%undef = OpUndef %f32\n"
8435                 "%zero = OpFMul %f32 %undef %c_f32_0\n"
8436                 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8437                 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8438                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8439                 "%b = OpFAdd %f32 %a %actually_zero\n"
8440                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8441                 "OpReturnValue %ret\n"
8442                 "OpFunctionEnd\n";
8443
8444         createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8445
8446         fragments["testfun"] =
8447                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8448                 "%param1 = OpFunctionParameter %v4f32\n"
8449                 "%label_testfun = OpLabel\n"
8450                 "%undef = OpUndef %i32\n"
8451                 "%zero = OpIMul %i32 %undef %c_i32_0\n"
8452                 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8453                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8454                 "OpReturnValue %ret\n"
8455                 "OpFunctionEnd\n";
8456
8457         createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8458
8459         fragments["testfun"] =
8460                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8461                 "%param1 = OpFunctionParameter %v4f32\n"
8462                 "%label_testfun = OpLabel\n"
8463                 "%undef = OpUndef %u32\n"
8464                 "%zero = OpIMul %u32 %undef %c_i32_0\n"
8465                 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8466                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8467                 "OpReturnValue %ret\n"
8468                 "OpFunctionEnd\n";
8469
8470         createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8471
8472         fragments["testfun"] =
8473                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8474                 "%param1 = OpFunctionParameter %v4f32\n"
8475                 "%label_testfun = OpLabel\n"
8476                 "%undef = OpUndef %v4f32\n"
8477                 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8478                 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8479                 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8480                 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8481                 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8482                 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8483                 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8484                 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8485                 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8486                 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8487                 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8488                 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8489                 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8490                 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8491                 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8492                 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8493                 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8494                 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8495                 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8496                 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8497                 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8498                 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8499                 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8500                 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8501                 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8502                 "OpReturnValue %ret\n"
8503                 "OpFunctionEnd\n";
8504
8505         createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8506
8507         fragments["pre_main"] =
8508                 "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8509         fragments["testfun"] =
8510                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8511                 "%param1 = OpFunctionParameter %v4f32\n"
8512                 "%label_testfun = OpLabel\n"
8513                 "%undef = OpUndef %m2x2f32\n"
8514                 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8515                 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8516                 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8517                 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8518                 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8519                 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8520                 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8521                 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8522                 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8523                 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8524                 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8525                 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8526                 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8527                 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8528                 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8529                 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8530                 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8531                 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8532                 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8533                 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8534                 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8535                 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8536                 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8537                 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8538                 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8539                 "OpReturnValue %ret\n"
8540                 "OpFunctionEnd\n";
8541
8542         createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8543
8544         return opUndefTests.release();
8545 }
8546
8547 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
8548 {
8549         const RGBA              inputColors[4]          =
8550         {
8551                 RGBA(0,         0,              0,              255),
8552                 RGBA(0,         0,              255,    255),
8553                 RGBA(0,         255,    0,              255),
8554                 RGBA(0,         255,    255,    255)
8555         };
8556
8557         const RGBA              expectedColors[4]       =
8558         {
8559                 RGBA(255,        0,              0,              255),
8560                 RGBA(255,        0,              0,              255),
8561                 RGBA(255,        0,              0,              255),
8562                 RGBA(255,        0,              0,              255)
8563         };
8564
8565         const struct SingleFP16Possibility
8566         {
8567                 const char* name;
8568                 const char* constant;  // Value to assign to %test_constant.
8569                 float           valueAsFloat;
8570                 const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
8571         }                               tests[]                         =
8572         {
8573                 {
8574                         "negative",
8575                         "-0x1.3p1\n",
8576                         -constructNormalizedFloat(1, 0x300000),
8577                         "%cond = OpFOrdEqual %bool %c %test_constant\n"
8578                 }, // -19
8579                 {
8580                         "positive",
8581                         "0x1.0p7\n",
8582                         constructNormalizedFloat(7, 0x000000),
8583                         "%cond = OpFOrdEqual %bool %c %test_constant\n"
8584                 },  // +128
8585                 // SPIR-V requires that OpQuantizeToF16 flushes
8586                 // any numbers that would end up denormalized in F16 to zero.
8587                 {
8588                         "denorm",
8589                         "0x0.0006p-126\n",
8590                         std::ldexp(1.5f, -140),
8591                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8592                 },  // denorm
8593                 {
8594                         "negative_denorm",
8595                         "-0x0.0006p-126\n",
8596                         -std::ldexp(1.5f, -140),
8597                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8598                 }, // -denorm
8599                 {
8600                         "too_small",
8601                         "0x1.0p-16\n",
8602                         std::ldexp(1.0f, -16),
8603                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8604                 },     // too small positive
8605                 {
8606                         "negative_too_small",
8607                         "-0x1.0p-32\n",
8608                         -std::ldexp(1.0f, -32),
8609                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8610                 },      // too small negative
8611                 {
8612                         "negative_inf",
8613                         "-0x1.0p128\n",
8614                         -std::ldexp(1.0f, 128),
8615
8616                         "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
8617                         "%inf = OpIsInf %bool %c\n"
8618                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8619                 },     // -inf to -inf
8620                 {
8621                         "inf",
8622                         "0x1.0p128\n",
8623                         std::ldexp(1.0f, 128),
8624
8625                         "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
8626                         "%inf = OpIsInf %bool %c\n"
8627                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8628                 },     // +inf to +inf
8629                 {
8630                         "round_to_negative_inf",
8631                         "-0x1.0p32\n",
8632                         -std::ldexp(1.0f, 32),
8633
8634                         "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
8635                         "%inf = OpIsInf %bool %c\n"
8636                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8637                 },     // round to -inf
8638                 {
8639                         "round_to_inf",
8640                         "0x1.0p16\n",
8641                         std::ldexp(1.0f, 16),
8642
8643                         "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
8644                         "%inf = OpIsInf %bool %c\n"
8645                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8646                 },     // round to +inf
8647                 {
8648                         "nan",
8649                         "0x1.1p128\n",
8650                         std::numeric_limits<float>::quiet_NaN(),
8651
8652                         // Test for any NaN value, as NaNs are not preserved
8653                         "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
8654                         "%cond = OpIsNan %bool %direct_quant\n"
8655                 }, // nan
8656                 {
8657                         "negative_nan",
8658                         "-0x1.0001p128\n",
8659                         std::numeric_limits<float>::quiet_NaN(),
8660
8661                         // Test for any NaN value, as NaNs are not preserved
8662                         "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
8663                         "%cond = OpIsNan %bool %direct_quant\n"
8664                 } // -nan
8665         };
8666         const char*             constants                       =
8667                 "%test_constant = OpConstant %f32 ";  // The value will be test.constant.
8668
8669         StringTemplate  function                        (
8670                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8671                 "%param1        = OpFunctionParameter %v4f32\n"
8672                 "%label_testfun = OpLabel\n"
8673                 "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8674                 "%b             = OpFAdd %f32 %test_constant %a\n"
8675                 "%c             = OpQuantizeToF16 %f32 %b\n"
8676                 "${condition}\n"
8677                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
8678                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
8679                 "                 OpReturnValue %retval\n"
8680                 "OpFunctionEnd\n"
8681         );
8682
8683         const char*             specDecorations         = "OpDecorate %test_constant SpecId 0\n";
8684         const char*             specConstants           =
8685                         "%test_constant = OpSpecConstant %f32 0.\n"
8686                         "%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
8687
8688         StringTemplate  specConstantFunction(
8689                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8690                 "%param1        = OpFunctionParameter %v4f32\n"
8691                 "%label_testfun = OpLabel\n"
8692                 "${condition}\n"
8693                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
8694                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
8695                 "                 OpReturnValue %retval\n"
8696                 "OpFunctionEnd\n"
8697         );
8698
8699         for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
8700         {
8701                 map<string, string>                                                             codeSpecialization;
8702                 map<string, string>                                                             fragments;
8703                 codeSpecialization["condition"]                                 = tests[idx].condition;
8704                 fragments["testfun"]                                                    = function.specialize(codeSpecialization);
8705                 fragments["pre_main"]                                                   = string(constants) + tests[idx].constant + "\n";
8706                 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
8707         }
8708
8709         for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
8710         {
8711                 map<string, string>                                                             codeSpecialization;
8712                 map<string, string>                                                             fragments;
8713                 SpecConstants                                                                   passConstants;
8714
8715                 codeSpecialization["condition"]                                 = tests[idx].condition;
8716                 fragments["testfun"]                                                    = specConstantFunction.specialize(codeSpecialization);
8717                 fragments["decoration"]                                                 = specDecorations;
8718                 fragments["pre_main"]                                                   = specConstants;
8719
8720                 passConstants.append<float>(tests[idx].valueAsFloat);
8721
8722                 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
8723         }
8724 }
8725
8726 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
8727 {
8728         RGBA inputColors[4] =  {
8729                 RGBA(0,         0,              0,              255),
8730                 RGBA(0,         0,              255,    255),
8731                 RGBA(0,         255,    0,              255),
8732                 RGBA(0,         255,    255,    255)
8733         };
8734
8735         RGBA expectedColors[4] =
8736         {
8737                 RGBA(255,        0,              0,              255),
8738                 RGBA(255,        0,              0,              255),
8739                 RGBA(255,        0,              0,              255),
8740                 RGBA(255,        0,              0,              255)
8741         };
8742
8743         struct DualFP16Possibility
8744         {
8745                 const char* name;
8746                 const char* input;
8747                 float           inputAsFloat;
8748                 const char* possibleOutput1;
8749                 const char* possibleOutput2;
8750         } tests[] = {
8751                 {
8752                         "positive_round_up_or_round_down",
8753                         "0x1.3003p8",
8754                         constructNormalizedFloat(8, 0x300300),
8755                         "0x1.304p8",
8756                         "0x1.3p8"
8757                 },
8758                 {
8759                         "negative_round_up_or_round_down",
8760                         "-0x1.6008p-7",
8761                         -constructNormalizedFloat(-7, 0x600800),
8762                         "-0x1.6p-7",
8763                         "-0x1.604p-7"
8764                 },
8765                 {
8766                         "carry_bit",
8767                         "0x1.01ep2",
8768                         constructNormalizedFloat(2, 0x01e000),
8769                         "0x1.01cp2",
8770                         "0x1.02p2"
8771                 },
8772                 {
8773                         "carry_to_exponent",
8774                         "0x1.ffep1",
8775                         constructNormalizedFloat(1, 0xffe000),
8776                         "0x1.ffcp1",
8777                         "0x1.0p2"
8778                 },
8779         };
8780         StringTemplate constants (
8781                 "%input_const = OpConstant %f32 ${input}\n"
8782                 "%possible_solution1 = OpConstant %f32 ${output1}\n"
8783                 "%possible_solution2 = OpConstant %f32 ${output2}\n"
8784                 );
8785
8786         StringTemplate specConstants (
8787                 "%input_const = OpSpecConstant %f32 0.\n"
8788                 "%possible_solution1 = OpConstant %f32 ${output1}\n"
8789                 "%possible_solution2 = OpConstant %f32 ${output2}\n"
8790         );
8791
8792         const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
8793
8794         const char* function  =
8795                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8796                 "%param1        = OpFunctionParameter %v4f32\n"
8797                 "%label_testfun = OpLabel\n"
8798                 "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8799                 // For the purposes of this test we assume that 0.f will always get
8800                 // faithfully passed through the pipeline stages.
8801                 "%b             = OpFAdd %f32 %input_const %a\n"
8802                 "%c             = OpQuantizeToF16 %f32 %b\n"
8803                 "%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
8804                 "%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
8805                 "%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
8806                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
8807                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
8808                 "                 OpReturnValue %retval\n"
8809                 "OpFunctionEnd\n";
8810
8811         for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
8812                 map<string, string>                                                                     fragments;
8813                 map<string, string>                                                                     constantSpecialization;
8814
8815                 constantSpecialization["input"]                                         = tests[idx].input;
8816                 constantSpecialization["output1"]                                       = tests[idx].possibleOutput1;
8817                 constantSpecialization["output2"]                                       = tests[idx].possibleOutput2;
8818                 fragments["testfun"]                                                            = function;
8819                 fragments["pre_main"]                                                           = constants.specialize(constantSpecialization);
8820                 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
8821         }
8822
8823         for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
8824                 map<string, string>                                                                     fragments;
8825                 map<string, string>                                                                     constantSpecialization;
8826                 SpecConstants                                                                           passConstants;
8827
8828                 constantSpecialization["output1"]                                       = tests[idx].possibleOutput1;
8829                 constantSpecialization["output2"]                                       = tests[idx].possibleOutput2;
8830                 fragments["testfun"]                                                            = function;
8831                 fragments["decoration"]                                                         = specDecorations;
8832                 fragments["pre_main"]                                                           = specConstants.specialize(constantSpecialization);
8833
8834                 passConstants.append<float>(tests[idx].inputAsFloat);
8835
8836                 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
8837         }
8838 }
8839
8840 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
8841 {
8842         de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
8843         createOpQuantizeSingleOptionTests(opQuantizeTests.get());
8844         createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
8845         return opQuantizeTests.release();
8846 }
8847
8848 struct ShaderPermutation
8849 {
8850         deUint8 vertexPermutation;
8851         deUint8 geometryPermutation;
8852         deUint8 tesscPermutation;
8853         deUint8 tessePermutation;
8854         deUint8 fragmentPermutation;
8855 };
8856
8857 ShaderPermutation getShaderPermutation(deUint8 inputValue)
8858 {
8859         ShaderPermutation       permutation =
8860         {
8861                 static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
8862                 static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
8863                 static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
8864                 static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
8865                 static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
8866         };
8867         return permutation;
8868 }
8869
8870 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
8871 {
8872         RGBA                                                            defaultColors[4];
8873         RGBA                                                            invertedColors[4];
8874         de::MovePtr<tcu::TestCaseGroup>         moduleTests                     (new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
8875
8876         getDefaultColors(defaultColors);
8877         getInvertedDefaultColors(invertedColors);
8878
8879         // Combined module tests
8880         {
8881                 // Shader stages: vertex and fragment
8882                 {
8883                         const ShaderElement combinedPipeline[]  =
8884                         {
8885                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8886                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8887                         };
8888
8889                         addFunctionCaseWithPrograms<InstanceContext>(
8890                                 moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
8891                                 createInstanceContext(combinedPipeline, map<string, string>()));
8892                 }
8893
8894                 // Shader stages: vertex, geometry and fragment
8895                 {
8896                         const ShaderElement combinedPipeline[]  =
8897                         {
8898                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8899                                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
8900                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8901                         };
8902
8903                         addFunctionCaseWithPrograms<InstanceContext>(
8904                                 moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
8905                                 createInstanceContext(combinedPipeline, map<string, string>()));
8906                 }
8907
8908                 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
8909                 {
8910                         const ShaderElement combinedPipeline[]  =
8911                         {
8912                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8913                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
8914                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
8915                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8916                         };
8917
8918                         addFunctionCaseWithPrograms<InstanceContext>(
8919                                 moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
8920                                 createInstanceContext(combinedPipeline, map<string, string>()));
8921                 }
8922
8923                 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
8924                 {
8925                         const ShaderElement combinedPipeline[]  =
8926                         {
8927                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8928                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
8929                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
8930                                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
8931                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8932                         };
8933
8934                         addFunctionCaseWithPrograms<InstanceContext>(
8935                                 moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
8936                                 createInstanceContext(combinedPipeline, map<string, string>()));
8937                 }
8938         }
8939
8940         const char* numbers[] =
8941         {
8942                 "1", "2"
8943         };
8944
8945         for (deInt8 idx = 0; idx < 32; ++idx)
8946         {
8947                 ShaderPermutation                       permutation             = getShaderPermutation(idx);
8948                 string                                          name                    = string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
8949                 const ShaderElement                     pipeline[]              =
8950                 {
8951                         ShaderElement("vert",   string("vert") +        numbers[permutation.vertexPermutation],         VK_SHADER_STAGE_VERTEX_BIT),
8952                         ShaderElement("geom",   string("geom") +        numbers[permutation.geometryPermutation],       VK_SHADER_STAGE_GEOMETRY_BIT),
8953                         ShaderElement("tessc",  string("tessc") +       numbers[permutation.tesscPermutation],          VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
8954                         ShaderElement("tesse",  string("tesse") +       numbers[permutation.tessePermutation],          VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
8955                         ShaderElement("frag",   string("frag") +        numbers[permutation.fragmentPermutation],       VK_SHADER_STAGE_FRAGMENT_BIT)
8956                 };
8957
8958                 // If there are an even number of swaps, then it should be no-op.
8959                 // If there are an odd number, the color should be flipped.
8960                 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
8961                 {
8962                         addFunctionCaseWithPrograms<InstanceContext>(
8963                                         moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
8964                                         createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
8965                 }
8966                 else
8967                 {
8968                         addFunctionCaseWithPrograms<InstanceContext>(
8969                                         moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
8970                                         createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
8971                 }
8972         }
8973         return moduleTests.release();
8974 }
8975
8976 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
8977 {
8978         switch (task)
8979         {
8980                 case SHADER_TASK_NONE:                  return "";
8981                 case SHADER_TASK_NORMAL:                return prefix + "_normal";
8982                 case SHADER_TASK_UNUSED_VAR:    return prefix + "_unused_var";
8983                 case SHADER_TASK_UNUSED_FUNC:   return prefix + "_unused_func";
8984                 default:                                                DE_ASSERT(DE_FALSE);
8985         }
8986         // unreachable
8987         return "";
8988 }
8989
8990 std::string getShaderTaskIndexName(ShaderTaskIndex index)
8991 {
8992         switch (index)
8993         {
8994         case SHADER_TASK_INDEX_VERTEX:                  return "vertex";
8995         case SHADER_TASK_INDEX_GEOMETRY:                return "geom";
8996         case SHADER_TASK_INDEX_TESS_CONTROL:    return "tessc";
8997         case SHADER_TASK_INDEX_TESS_EVAL:               return "tesse";
8998         case SHADER_TASK_INDEX_FRAGMENT:                return "frag";
8999         default:                                                                DE_ASSERT(DE_FALSE);
9000         }
9001         // unreachable
9002         return "";
9003 }
9004
9005 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9006 {
9007         std::string testName = location.toString();
9008
9009         for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9010         {
9011                 if (shaderTasks[i] != SHADER_TASK_NONE)
9012                 {
9013                         testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9014                 }
9015         }
9016
9017         return testName;
9018 }
9019
9020 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9021 {
9022         de::MovePtr<tcu::TestCaseGroup>         moduleTests                             (new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9023
9024         ShaderTaskArray                                         shaderCombinations[]    =
9025         {
9026                 // Vertex                                       Geometry                                        Tess. Control                           Tess. Evaluation                        Fragment
9027                 { SHADER_TASK_UNUSED_VAR,       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9028                 { SHADER_TASK_UNUSED_FUNC,      SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9029                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_VAR  },
9030                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_FUNC },
9031                 { SHADER_TASK_NORMAL,           SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9032                 { SHADER_TASK_NORMAL,           SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9033                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NORMAL,                     SHADER_TASK_NORMAL      },
9034                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NORMAL,                     SHADER_TASK_NORMAL      },
9035                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NORMAL,                     SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NORMAL      },
9036                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NORMAL,                     SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NORMAL      }
9037         };
9038
9039         const VariableLocation                          testLocations[] =
9040         {
9041                 // Set          Binding
9042                 { 0,            5                       },
9043                 { 5,            5                       },
9044         };
9045
9046         for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9047         {
9048                 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9049                 {
9050                         const ShaderTaskArray&  shaderTasks             = shaderCombinations[combNdx];
9051                         const VariableLocation& location                = testLocations[locationNdx];
9052                         std::string                             testName                = getUnusedVarTestName(shaderTasks, location);
9053
9054                         addFunctionCaseWithPrograms<UnusedVariableContext>(
9055                                 moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9056                                 createUnusedVariableContext(shaderTasks, location));
9057                 }
9058         }
9059
9060         return moduleTests.release();
9061 }
9062
9063 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9064 {
9065         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9066         RGBA defaultColors[4];
9067         getDefaultColors(defaultColors);
9068         map<string, string> fragments;
9069         fragments["pre_main"] =
9070                 "%c_f32_5 = OpConstant %f32 5.\n";
9071
9072         // A loop with a single block. The Continue Target is the loop block
9073         // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9074         // -- the "continue construct" forms the entire loop.
9075         fragments["testfun"] =
9076                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9077                 "%param1 = OpFunctionParameter %v4f32\n"
9078
9079                 "%entry = OpLabel\n"
9080                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9081                 "OpBranch %loop\n"
9082
9083                 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9084                 "%loop = OpLabel\n"
9085                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9086                 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9087                 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9088                 "%val = OpFAdd %f32 %val1 %delta\n"
9089                 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9090                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9091                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9092                 "OpLoopMerge %exit %loop None\n"
9093                 "OpBranchConditional %again %loop %exit\n"
9094
9095                 "%exit = OpLabel\n"
9096                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9097                 "OpReturnValue %result\n"
9098
9099                 "OpFunctionEnd\n";
9100
9101         createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9102
9103         // Body comprised of multiple basic blocks.
9104         const StringTemplate multiBlock(
9105                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9106                 "%param1 = OpFunctionParameter %v4f32\n"
9107
9108                 "%entry = OpLabel\n"
9109                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9110                 "OpBranch %loop\n"
9111
9112                 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9113                 "%loop = OpLabel\n"
9114                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9115                 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9116                 "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9117                 // There are several possibilities for the Continue Target below.  Each
9118                 // will be specialized into a separate test case.
9119                 "OpLoopMerge %exit ${continue_target} None\n"
9120                 "OpBranch %if\n"
9121
9122                 "%if = OpLabel\n"
9123                 ";delta_next = (delta > 0) ? -1 : 1;\n"
9124                 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9125                 "OpSelectionMerge %gather DontFlatten\n"
9126                 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9127
9128                 "%odd = OpLabel\n"
9129                 "OpBranch %gather\n"
9130
9131                 "%even = OpLabel\n"
9132                 "OpBranch %gather\n"
9133
9134                 "%gather = OpLabel\n"
9135                 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9136                 "%val = OpFAdd %f32 %val1 %delta\n"
9137                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9138                 "OpBranch %cont\n"
9139
9140                 "%cont = OpLabel\n"
9141                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9142                 "OpBranchConditional %again %loop %exit\n"
9143
9144                 "%exit = OpLabel\n"
9145                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9146                 "OpReturnValue %result\n"
9147
9148                 "OpFunctionEnd\n");
9149
9150         map<string, string> continue_target;
9151
9152         // The Continue Target is the loop block itself.
9153         continue_target["continue_target"] = "%loop";
9154         fragments["testfun"] = multiBlock.specialize(continue_target);
9155         createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9156
9157         // The Continue Target is at the end of the loop.
9158         continue_target["continue_target"] = "%cont";
9159         fragments["testfun"] = multiBlock.specialize(continue_target);
9160         createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9161
9162         // A loop with continue statement.
9163         fragments["testfun"] =
9164                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9165                 "%param1 = OpFunctionParameter %v4f32\n"
9166
9167                 "%entry = OpLabel\n"
9168                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9169                 "OpBranch %loop\n"
9170
9171                 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9172                 "%loop = OpLabel\n"
9173                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9174                 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9175                 "OpLoopMerge %exit %continue None\n"
9176                 "OpBranch %if\n"
9177
9178                 "%if = OpLabel\n"
9179                 ";skip if %count==2\n"
9180                 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9181                 "OpBranchConditional %eq2 %continue %body\n"
9182
9183                 "%body = OpLabel\n"
9184                 "%fcount = OpConvertSToF %f32 %count\n"
9185                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9186                 "OpBranch %continue\n"
9187
9188                 "%continue = OpLabel\n"
9189                 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9190                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9191                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9192                 "OpBranchConditional %again %loop %exit\n"
9193
9194                 "%exit = OpLabel\n"
9195                 "%same = OpFSub %f32 %val %c_f32_8\n"
9196                 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9197                 "OpReturnValue %result\n"
9198                 "OpFunctionEnd\n";
9199         createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9200
9201         // A loop with break.
9202         fragments["testfun"] =
9203                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9204                 "%param1 = OpFunctionParameter %v4f32\n"
9205
9206                 "%entry = OpLabel\n"
9207                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9208                 "%dot = OpDot %f32 %param1 %param1\n"
9209                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9210                 "%zero = OpConvertFToU %u32 %div\n"
9211                 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9212                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9213                 "OpBranch %loop\n"
9214
9215                 ";adds 4 and 3 to %val0 (exits early)\n"
9216                 "%loop = OpLabel\n"
9217                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9218                 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9219                 "OpLoopMerge %exit %continue None\n"
9220                 "OpBranch %if\n"
9221
9222                 "%if = OpLabel\n"
9223                 ";end loop if %count==%two\n"
9224                 "%above2 = OpSGreaterThan %bool %count %two\n"
9225                 "OpBranchConditional %above2 %body %exit\n"
9226
9227                 "%body = OpLabel\n"
9228                 "%fcount = OpConvertSToF %f32 %count\n"
9229                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9230                 "OpBranch %continue\n"
9231
9232                 "%continue = OpLabel\n"
9233                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9234                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9235                 "OpBranchConditional %again %loop %exit\n"
9236
9237                 "%exit = OpLabel\n"
9238                 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9239                 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9240                 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9241                 "OpReturnValue %result\n"
9242                 "OpFunctionEnd\n";
9243         createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9244
9245         // A loop with return.
9246         fragments["testfun"] =
9247                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9248                 "%param1 = OpFunctionParameter %v4f32\n"
9249
9250                 "%entry = OpLabel\n"
9251                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9252                 "%dot = OpDot %f32 %param1 %param1\n"
9253                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9254                 "%zero = OpConvertFToU %u32 %div\n"
9255                 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9256                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9257                 "OpBranch %loop\n"
9258
9259                 ";returns early without modifying %param1\n"
9260                 "%loop = OpLabel\n"
9261                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9262                 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9263                 "OpLoopMerge %exit %continue None\n"
9264                 "OpBranch %if\n"
9265
9266                 "%if = OpLabel\n"
9267                 ";return if %count==%two\n"
9268                 "%above2 = OpSGreaterThan %bool %count %two\n"
9269                 "OpSelectionMerge %body DontFlatten\n"
9270                 "OpBranchConditional %above2 %body %early_exit\n"
9271
9272                 "%early_exit = OpLabel\n"
9273                 "OpReturnValue %param1\n"
9274
9275                 "%body = OpLabel\n"
9276                 "%fcount = OpConvertSToF %f32 %count\n"
9277                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9278                 "OpBranch %continue\n"
9279
9280                 "%continue = OpLabel\n"
9281                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9282                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9283                 "OpBranchConditional %again %loop %exit\n"
9284
9285                 "%exit = OpLabel\n"
9286                 ";should never get here, so return an incorrect result\n"
9287                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9288                 "OpReturnValue %result\n"
9289                 "OpFunctionEnd\n";
9290         createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9291
9292         // Continue inside a switch block to break to enclosing loop's merge block.
9293         // Matches roughly the following GLSL code:
9294         // for (; keep_going; keep_going = false)
9295         // {
9296         //     switch (int(param1.x))
9297         //     {
9298         //         case 0: continue;
9299         //         case 1: continue;
9300         //         default: continue;
9301         //     }
9302         //     dead code: modify return value to invalid result.
9303         // }
9304         fragments["pre_main"] =
9305                 "%fp_bool = OpTypePointer Function %bool\n"
9306                 "%true = OpConstantTrue %bool\n"
9307                 "%false = OpConstantFalse %bool\n";
9308
9309         fragments["testfun"] =
9310                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9311                 "%param1 = OpFunctionParameter %v4f32\n"
9312
9313                 "%entry = OpLabel\n"
9314                 "%keep_going = OpVariable %fp_bool Function\n"
9315                 "%val_ptr = OpVariable %fp_f32 Function\n"
9316                 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
9317                 "OpStore %keep_going %true\n"
9318                 "OpBranch %forloop_begin\n"
9319
9320                 "%forloop_begin = OpLabel\n"
9321                 "OpLoopMerge %forloop_merge %forloop_continue None\n"
9322                 "OpBranch %forloop\n"
9323
9324                 "%forloop = OpLabel\n"
9325                 "%for_condition = OpLoad %bool %keep_going\n"
9326                 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9327
9328                 "%forloop_body = OpLabel\n"
9329                 "OpStore %val_ptr %param1_x\n"
9330                 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9331
9332                 "OpSelectionMerge %switch_merge None\n"
9333                 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9334                 "%case_0 = OpLabel\n"
9335                 "OpBranch %forloop_continue\n"
9336                 "%case_1 = OpLabel\n"
9337                 "OpBranch %forloop_continue\n"
9338                 "%default = OpLabel\n"
9339                 "OpBranch %forloop_continue\n"
9340                 "%switch_merge = OpLabel\n"
9341                 ";should never get here, so change the return value to invalid result\n"
9342                 "OpStore %val_ptr %c_f32_1\n"
9343                 "OpBranch %forloop_continue\n"
9344
9345                 "%forloop_continue = OpLabel\n"
9346                 "OpStore %keep_going %false\n"
9347                 "OpBranch %forloop_begin\n"
9348                 "%forloop_merge = OpLabel\n"
9349
9350                 "%val = OpLoad %f32 %val_ptr\n"
9351                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9352                 "OpReturnValue %result\n"
9353                 "OpFunctionEnd\n";
9354         createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9355
9356         return testGroup.release();
9357 }
9358
9359 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
9360 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9361 {
9362         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9363         map<string, string> fragments;
9364
9365         // A barrier inside a function body.
9366         fragments["pre_main"] =
9367                 "%Workgroup = OpConstant %i32 2\n"
9368                 "%WorkgroupAcquireRelease = OpConstant %i32 0x108\n";
9369         fragments["testfun"] =
9370                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9371                 "%param1 = OpFunctionParameter %v4f32\n"
9372                 "%label_testfun = OpLabel\n"
9373                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9374                 "OpReturnValue %param1\n"
9375                 "OpFunctionEnd\n";
9376         addTessCtrlTest(testGroup.get(), "in_function", fragments);
9377
9378         // Common setup code for the following tests.
9379         fragments["pre_main"] =
9380                 "%Workgroup = OpConstant %i32 2\n"
9381                 "%WorkgroupAcquireRelease = OpConstant %i32 0x108\n"
9382                 "%c_f32_5 = OpConstant %f32 5.\n";
9383         const string setupPercentZero =  // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9384                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9385                 "%param1 = OpFunctionParameter %v4f32\n"
9386                 "%entry = OpLabel\n"
9387                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9388                 "%dot = OpDot %f32 %param1 %param1\n"
9389                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9390                 "%zero = OpConvertFToU %u32 %div\n";
9391
9392         // Barriers inside OpSwitch branches.
9393         fragments["testfun"] =
9394                 setupPercentZero +
9395                 "OpSelectionMerge %switch_exit None\n"
9396                 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9397
9398                 "%case1 = OpLabel\n"
9399                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9400                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9401                 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9402                 "OpBranch %switch_exit\n"
9403
9404                 "%switch_default = OpLabel\n"
9405                 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9406                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9407                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9408                 "OpBranch %switch_exit\n"
9409
9410                 "%case0 = OpLabel\n"
9411                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9412                 "OpBranch %switch_exit\n"
9413
9414                 "%switch_exit = OpLabel\n"
9415                 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9416                 "OpReturnValue %ret\n"
9417                 "OpFunctionEnd\n";
9418         addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9419
9420         // Barriers inside if-then-else.
9421         fragments["testfun"] =
9422                 setupPercentZero +
9423                 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9424                 "OpSelectionMerge %exit DontFlatten\n"
9425                 "OpBranchConditional %eq0 %then %else\n"
9426
9427                 "%else = OpLabel\n"
9428                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9429                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9430                 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9431                 "OpBranch %exit\n"
9432
9433                 "%then = OpLabel\n"
9434                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9435                 "OpBranch %exit\n"
9436                 "%exit = OpLabel\n"
9437                 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9438                 "OpReturnValue %ret\n"
9439                 "OpFunctionEnd\n";
9440         addTessCtrlTest(testGroup.get(), "in_if", fragments);
9441
9442         // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9443         // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9444         fragments["testfun"] =
9445                 setupPercentZero +
9446                 "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9447                 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9448                 "OpSelectionMerge %exit DontFlatten\n"
9449                 "OpBranchConditional %thread0 %then %else\n"
9450
9451                 "%else = OpLabel\n"
9452                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9453                 "OpBranch %exit\n"
9454
9455                 "%then = OpLabel\n"
9456                 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9457                 "OpBranch %exit\n"
9458
9459                 "%exit = OpLabel\n"
9460                 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
9461                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9462                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9463                 "OpReturnValue %ret\n"
9464                 "OpFunctionEnd\n";
9465         addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9466
9467         // A barrier inside a loop.
9468         fragments["pre_main"] =
9469                 "%Workgroup = OpConstant %i32 2\n"
9470                 "%WorkgroupAcquireRelease = OpConstant %i32 0x108\n"
9471                 "%c_f32_10 = OpConstant %f32 10.\n";
9472         fragments["testfun"] =
9473                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9474                 "%param1 = OpFunctionParameter %v4f32\n"
9475                 "%entry = OpLabel\n"
9476                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9477                 "OpBranch %loop\n"
9478
9479                 ";adds 4, 3, 2, and 1 to %val0\n"
9480                 "%loop = OpLabel\n"
9481                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9482                 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9483                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9484                 "%fcount = OpConvertSToF %f32 %count\n"
9485                 "%val = OpFAdd %f32 %val1 %fcount\n"
9486                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9487                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9488                 "OpLoopMerge %exit %loop None\n"
9489                 "OpBranchConditional %again %loop %exit\n"
9490
9491                 "%exit = OpLabel\n"
9492                 "%same = OpFSub %f32 %val %c_f32_10\n"
9493                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9494                 "OpReturnValue %ret\n"
9495                 "OpFunctionEnd\n";
9496         addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9497
9498         return testGroup.release();
9499 }
9500
9501 // Test for the OpFRem instruction.
9502 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9503 {
9504         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9505         map<string, string>                                     fragments;
9506         RGBA                                                            inputColors[4];
9507         RGBA                                                            outputColors[4];
9508
9509         fragments["pre_main"]                            =
9510                 "%c_f32_3 = OpConstant %f32 3.0\n"
9511                 "%c_f32_n3 = OpConstant %f32 -3.0\n"
9512                 "%c_f32_4 = OpConstant %f32 4.0\n"
9513                 "%c_f32_p75 = OpConstant %f32 0.75\n"
9514                 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9515                 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9516                 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9517
9518         // The test does the following.
9519         // vec4 result = (param1 * 8.0) - 4.0;
9520         // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9521         fragments["testfun"]                             =
9522                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9523                 "%param1 = OpFunctionParameter %v4f32\n"
9524                 "%label_testfun = OpLabel\n"
9525                 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9526                 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9527                 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9528                 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9529                 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9530                 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9531                 "OpReturnValue %xy_0_1\n"
9532                 "OpFunctionEnd\n";
9533
9534
9535         inputColors[0]          = RGBA(16,      16,             0, 255);
9536         inputColors[1]          = RGBA(232, 232,        0, 255);
9537         inputColors[2]          = RGBA(232, 16,         0, 255);
9538         inputColors[3]          = RGBA(16,      232,    0, 255);
9539
9540         outputColors[0]         = RGBA(64,      64,             0, 255);
9541         outputColors[1]         = RGBA(255, 255,        0, 255);
9542         outputColors[2]         = RGBA(255, 64,         0, 255);
9543         outputColors[3]         = RGBA(64,      255,    0, 255);
9544
9545         createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
9546         return testGroup.release();
9547 }
9548
9549 // Test for the OpSRem instruction.
9550 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
9551 {
9552         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
9553         map<string, string>                                     fragments;
9554
9555         fragments["pre_main"]                            =
9556                 "%c_f32_255 = OpConstant %f32 255.0\n"
9557                 "%c_i32_128 = OpConstant %i32 128\n"
9558                 "%c_i32_255 = OpConstant %i32 255\n"
9559                 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
9560                 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
9561                 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
9562
9563         // The test does the following.
9564         // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
9565         // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
9566         // return float(result + 128) / 255.0;
9567         fragments["testfun"]                             =
9568                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9569                 "%param1 = OpFunctionParameter %v4f32\n"
9570                 "%label_testfun = OpLabel\n"
9571                 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
9572                 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
9573                 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
9574                 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
9575                 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
9576                 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
9577                 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
9578                 "%x_out = OpSRem %i32 %x_in %y_in\n"
9579                 "%y_out = OpSRem %i32 %y_in %z_in\n"
9580                 "%z_out = OpSRem %i32 %z_in %x_in\n"
9581                 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
9582                 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
9583                 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
9584                 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
9585                 "OpReturnValue %float_out\n"
9586                 "OpFunctionEnd\n";
9587
9588         const struct CaseParams
9589         {
9590                 const char*             name;
9591                 const char*             failMessageTemplate;    // customized status message
9592                 qpTestResult    failResult;                             // override status on failure
9593                 int                             operands[4][3];                 // four (x, y, z) vectors of operands
9594                 int                             results[4][3];                  // four (x, y, z) vectors of results
9595         } cases[] =
9596         {
9597                 {
9598                         "positive",
9599                         "${reason}",
9600                         QP_TEST_RESULT_FAIL,
9601                         { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                 // operands
9602                         { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                 // results
9603                 },
9604                 {
9605                         "all",
9606                         "Inconsistent results, but within specification: ${reason}",
9607                         negFailResult,                                                                                                                  // negative operands, not required by the spec
9608                         { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },    // operands
9609                         { { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },    // results
9610                 },
9611         };
9612         // If either operand is negative the result is undefined. Some implementations may still return correct values.
9613
9614         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
9615         {
9616                 const CaseParams&       params                  = cases[caseNdx];
9617                 RGBA                            inputColors[4];
9618                 RGBA                            outputColors[4];
9619
9620                 for (int i = 0; i < 4; ++i)
9621                 {
9622                         inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
9623                         outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
9624                 }
9625
9626                 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
9627         }
9628
9629         return testGroup.release();
9630 }
9631
9632 // Test for the OpSMod instruction.
9633 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
9634 {
9635         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
9636         map<string, string>                                     fragments;
9637
9638         fragments["pre_main"]                            =
9639                 "%c_f32_255 = OpConstant %f32 255.0\n"
9640                 "%c_i32_128 = OpConstant %i32 128\n"
9641                 "%c_i32_255 = OpConstant %i32 255\n"
9642                 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
9643                 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
9644                 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
9645
9646         // The test does the following.
9647         // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
9648         // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
9649         // return float(result + 128) / 255.0;
9650         fragments["testfun"]                             =
9651                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9652                 "%param1 = OpFunctionParameter %v4f32\n"
9653                 "%label_testfun = OpLabel\n"
9654                 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
9655                 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
9656                 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
9657                 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
9658                 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
9659                 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
9660                 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
9661                 "%x_out = OpSMod %i32 %x_in %y_in\n"
9662                 "%y_out = OpSMod %i32 %y_in %z_in\n"
9663                 "%z_out = OpSMod %i32 %z_in %x_in\n"
9664                 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
9665                 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
9666                 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
9667                 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
9668                 "OpReturnValue %float_out\n"
9669                 "OpFunctionEnd\n";
9670
9671         const struct CaseParams
9672         {
9673                 const char*             name;
9674                 const char*             failMessageTemplate;    // customized status message
9675                 qpTestResult    failResult;                             // override status on failure
9676                 int                             operands[4][3];                 // four (x, y, z) vectors of operands
9677                 int                             results[4][3];                  // four (x, y, z) vectors of results
9678         } cases[] =
9679         {
9680                 {
9681                         "positive",
9682                         "${reason}",
9683                         QP_TEST_RESULT_FAIL,
9684                         { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                         // operands
9685                         { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                         // results
9686                 },
9687                 {
9688                         "all",
9689                         "Inconsistent results, but within specification: ${reason}",
9690                         negFailResult,                                                                                                                          // negative operands, not required by the spec
9691                         { { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },        // operands
9692                         { { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },        // results
9693                 },
9694         };
9695         // If either operand is negative the result is undefined. Some implementations may still return correct values.
9696
9697         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
9698         {
9699                 const CaseParams&       params                  = cases[caseNdx];
9700                 RGBA                            inputColors[4];
9701                 RGBA                            outputColors[4];
9702
9703                 for (int i = 0; i < 4; ++i)
9704                 {
9705                         inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
9706                         outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
9707                 }
9708
9709                 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
9710         }
9711         return testGroup.release();
9712 }
9713
9714 enum ConversionDataType
9715 {
9716         DATA_TYPE_SIGNED_8,
9717         DATA_TYPE_SIGNED_16,
9718         DATA_TYPE_SIGNED_32,
9719         DATA_TYPE_SIGNED_64,
9720         DATA_TYPE_UNSIGNED_8,
9721         DATA_TYPE_UNSIGNED_16,
9722         DATA_TYPE_UNSIGNED_32,
9723         DATA_TYPE_UNSIGNED_64,
9724         DATA_TYPE_FLOAT_16,
9725         DATA_TYPE_FLOAT_32,
9726         DATA_TYPE_FLOAT_64,
9727         DATA_TYPE_VEC2_SIGNED_16,
9728         DATA_TYPE_VEC2_SIGNED_32
9729 };
9730
9731 const string getBitWidthStr (ConversionDataType type)
9732 {
9733         switch (type)
9734         {
9735                 case DATA_TYPE_SIGNED_8:
9736                 case DATA_TYPE_UNSIGNED_8:
9737                         return "8";
9738
9739                 case DATA_TYPE_SIGNED_16:
9740                 case DATA_TYPE_UNSIGNED_16:
9741                 case DATA_TYPE_FLOAT_16:
9742                         return "16";
9743
9744                 case DATA_TYPE_SIGNED_32:
9745                 case DATA_TYPE_UNSIGNED_32:
9746                 case DATA_TYPE_FLOAT_32:
9747                 case DATA_TYPE_VEC2_SIGNED_16:
9748                         return "32";
9749
9750                 case DATA_TYPE_SIGNED_64:
9751                 case DATA_TYPE_UNSIGNED_64:
9752                 case DATA_TYPE_FLOAT_64:
9753                 case DATA_TYPE_VEC2_SIGNED_32:
9754                         return "64";
9755
9756                 default:
9757                         DE_ASSERT(false);
9758         }
9759         return "";
9760 }
9761
9762 const string getByteWidthStr (ConversionDataType type)
9763 {
9764         switch (type)
9765         {
9766                 case DATA_TYPE_SIGNED_8:
9767                 case DATA_TYPE_UNSIGNED_8:
9768                         return "1";
9769
9770                 case DATA_TYPE_SIGNED_16:
9771                 case DATA_TYPE_UNSIGNED_16:
9772                 case DATA_TYPE_FLOAT_16:
9773                         return "2";
9774
9775                 case DATA_TYPE_SIGNED_32:
9776                 case DATA_TYPE_UNSIGNED_32:
9777                 case DATA_TYPE_FLOAT_32:
9778                 case DATA_TYPE_VEC2_SIGNED_16:
9779                         return "4";
9780
9781                 case DATA_TYPE_SIGNED_64:
9782                 case DATA_TYPE_UNSIGNED_64:
9783                 case DATA_TYPE_FLOAT_64:
9784                 case DATA_TYPE_VEC2_SIGNED_32:
9785                         return "8";
9786
9787                 default:
9788                         DE_ASSERT(false);
9789         }
9790         return "";
9791 }
9792
9793 bool isSigned (ConversionDataType type)
9794 {
9795         switch (type)
9796         {
9797                 case DATA_TYPE_SIGNED_8:
9798                 case DATA_TYPE_SIGNED_16:
9799                 case DATA_TYPE_SIGNED_32:
9800                 case DATA_TYPE_SIGNED_64:
9801                 case DATA_TYPE_FLOAT_16:
9802                 case DATA_TYPE_FLOAT_32:
9803                 case DATA_TYPE_FLOAT_64:
9804                 case DATA_TYPE_VEC2_SIGNED_16:
9805                 case DATA_TYPE_VEC2_SIGNED_32:
9806                         return true;
9807
9808                 case DATA_TYPE_UNSIGNED_8:
9809                 case DATA_TYPE_UNSIGNED_16:
9810                 case DATA_TYPE_UNSIGNED_32:
9811                 case DATA_TYPE_UNSIGNED_64:
9812                         return false;
9813
9814                 default:
9815                         DE_ASSERT(false);
9816         }
9817         return false;
9818 }
9819
9820 bool isInt (ConversionDataType type)
9821 {
9822         switch (type)
9823         {
9824                 case DATA_TYPE_SIGNED_8:
9825                 case DATA_TYPE_SIGNED_16:
9826                 case DATA_TYPE_SIGNED_32:
9827                 case DATA_TYPE_SIGNED_64:
9828                 case DATA_TYPE_UNSIGNED_8:
9829                 case DATA_TYPE_UNSIGNED_16:
9830                 case DATA_TYPE_UNSIGNED_32:
9831                 case DATA_TYPE_UNSIGNED_64:
9832                         return true;
9833
9834                 case DATA_TYPE_FLOAT_16:
9835                 case DATA_TYPE_FLOAT_32:
9836                 case DATA_TYPE_FLOAT_64:
9837                 case DATA_TYPE_VEC2_SIGNED_16:
9838                 case DATA_TYPE_VEC2_SIGNED_32:
9839                         return false;
9840
9841                 default:
9842                         DE_ASSERT(false);
9843         }
9844         return false;
9845 }
9846
9847 bool isFloat (ConversionDataType type)
9848 {
9849         switch (type)
9850         {
9851                 case DATA_TYPE_SIGNED_8:
9852                 case DATA_TYPE_SIGNED_16:
9853                 case DATA_TYPE_SIGNED_32:
9854                 case DATA_TYPE_SIGNED_64:
9855                 case DATA_TYPE_UNSIGNED_8:
9856                 case DATA_TYPE_UNSIGNED_16:
9857                 case DATA_TYPE_UNSIGNED_32:
9858                 case DATA_TYPE_UNSIGNED_64:
9859                 case DATA_TYPE_VEC2_SIGNED_16:
9860                 case DATA_TYPE_VEC2_SIGNED_32:
9861                         return false;
9862
9863                 case DATA_TYPE_FLOAT_16:
9864                 case DATA_TYPE_FLOAT_32:
9865                 case DATA_TYPE_FLOAT_64:
9866                         return true;
9867
9868                 default:
9869                         DE_ASSERT(false);
9870         }
9871         return false;
9872 }
9873
9874 const string getTypeName (ConversionDataType type)
9875 {
9876         string prefix = isSigned(type) ? "" : "u";
9877
9878         if              (isInt(type))                                           return prefix + "int"   + getBitWidthStr(type);
9879         else if (isFloat(type))                                         return prefix + "float" + getBitWidthStr(type);
9880         else if (type == DATA_TYPE_VEC2_SIGNED_16)      return "i16vec2";
9881         else if (type == DATA_TYPE_VEC2_SIGNED_32)      return "i32vec2";
9882         else                                                                            DE_ASSERT(false);
9883
9884         return "";
9885 }
9886
9887 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
9888 {
9889         const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
9890
9891         return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
9892 }
9893
9894 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
9895 {
9896         string prefix;
9897
9898         if              (isInt(type))                                           prefix = isSigned(type) ? "i" : "u";
9899         else if (isFloat(type))                                         prefix = "f";
9900         else if (type == DATA_TYPE_VEC2_SIGNED_16)      return "i16vec2";
9901         else if (type == DATA_TYPE_VEC2_SIGNED_32)      return "v2i32";
9902         else                                                                            DE_ASSERT(false);
9903         if ((isInt(type) || isFloat(type)) && elements == 2)
9904         {
9905                 prefix = "v2" + prefix;
9906         }
9907
9908         return prefix + getBitWidthStr(type);
9909 }
9910
9911 template<typename T>
9912 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
9913 {
9914         return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
9915 }
9916
9917 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
9918 {
9919         switch (type)
9920         {
9921                 case DATA_TYPE_SIGNED_8:                return getSpecializedBuffer<deInt8>(number, elements);
9922                 case DATA_TYPE_SIGNED_16:               return getSpecializedBuffer<deInt16>(number, elements);
9923                 case DATA_TYPE_SIGNED_32:               return getSpecializedBuffer<deInt32>(number, elements);
9924                 case DATA_TYPE_SIGNED_64:               return getSpecializedBuffer<deInt64>(number, elements);
9925                 case DATA_TYPE_UNSIGNED_8:              return getSpecializedBuffer<deUint8>(number, elements);
9926                 case DATA_TYPE_UNSIGNED_16:             return getSpecializedBuffer<deUint16>(number, elements);
9927                 case DATA_TYPE_UNSIGNED_32:             return getSpecializedBuffer<deUint32>(number, elements);
9928                 case DATA_TYPE_UNSIGNED_64:             return getSpecializedBuffer<deUint64>(number, elements);
9929                 case DATA_TYPE_FLOAT_16:                return getSpecializedBuffer<deUint16>(number, elements);
9930                 case DATA_TYPE_FLOAT_32:                return getSpecializedBuffer<deUint32>(number, elements);
9931                 case DATA_TYPE_FLOAT_64:                return getSpecializedBuffer<deUint64>(number, elements);
9932                 case DATA_TYPE_VEC2_SIGNED_16:  return getSpecializedBuffer<deUint32>(number, elements);
9933                 case DATA_TYPE_VEC2_SIGNED_32:  return getSpecializedBuffer<deUint64>(number, elements);
9934
9935                 default:                                                TCU_THROW(InternalError, "Unimplemented type passed");
9936         }
9937 }
9938
9939 bool usesInt8 (ConversionDataType from, ConversionDataType to)
9940 {
9941         return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
9942                         from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
9943 }
9944
9945 bool usesInt16 (ConversionDataType from, ConversionDataType to)
9946 {
9947         return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
9948                         from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
9949                         from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
9950 }
9951
9952 bool usesInt32 (ConversionDataType from, ConversionDataType to)
9953 {
9954         return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
9955                         from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
9956                         from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
9957 }
9958
9959 bool usesInt64 (ConversionDataType from, ConversionDataType to)
9960 {
9961         return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
9962                         from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
9963 }
9964
9965 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
9966 {
9967         return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
9968 }
9969
9970 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
9971 {
9972         return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
9973 }
9974
9975 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
9976 {
9977         return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
9978 }
9979
9980 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
9981 {
9982         if (usesInt16(from, to) && !usesInt32(from, to))
9983                 vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
9984
9985         if (usesInt64(from, to))
9986                 vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
9987
9988         if (usesFloat64(from, to))
9989                 vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
9990
9991         if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
9992         {
9993                 extensions.push_back("VK_KHR_16bit_storage");
9994                 vulkanFeatures.ext16BitStorage |= EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
9995         }
9996
9997         if (usesFloat16(from, to) || usesInt8(from, to))
9998         {
9999                 extensions.push_back("VK_KHR_shader_float16_int8");
10000
10001                 if (usesFloat16(from, to))
10002                 {
10003                         vulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_FLOAT16;
10004                 }
10005
10006                 if (usesInt8(from, to))
10007                 {
10008                         vulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_INT8;
10009
10010                         extensions.push_back("VK_KHR_8bit_storage");
10011                         vulkanFeatures.ext8BitStorage |= EXT8BITSTORAGEFEATURES_STORAGE_BUFFER;
10012                 }
10013         }
10014 }
10015
10016 struct ConvertCase
10017 {
10018         ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10019         : m_fromType            (from)
10020         , m_toType                      (to)
10021         , m_elements            (1)
10022         , m_useStorageExt       (useStorageExt)
10023         , m_name                        (getTestName(from, to, suffix))
10024         {
10025                 string caps;
10026                 string decl;
10027                 string exts;
10028
10029                 m_asmTypes["inStorageType"]     = getAsmTypeName(from);
10030                 m_asmTypes["outStorageType"] = getAsmTypeName(to);
10031                 m_asmTypes["inCast"] = "OpCopyObject";
10032                 m_asmTypes["outCast"] = "OpCopyObject";
10033                 // If the storage extensions are being avoided, tests instead uses
10034                 // vectors so that they are easily convertible to 32-bit integers.
10035                 // |m_elements| indicates the size of the vector. It modifies how many
10036                 // items added to the buffers and converted in the tests.
10037                 //
10038                 // Currently only supports 1 (default) or 2 elements.
10039                 if (!m_useStorageExt)
10040                 {
10041                         bool in_change = false;
10042                         bool out_change = false;
10043                         if (usesFloat16(from, from) || usesInt16(from, from))
10044                         {
10045                                 m_asmTypes["inStorageType"] = "u32";
10046                                 m_asmTypes["inCast"] = "OpBitcast";
10047                                 m_elements = 2;
10048                                 in_change = true;
10049                         }
10050                         if (usesFloat16(to, to) || usesInt16(to, to))
10051                         {
10052                                 m_asmTypes["outStorageType"] = "u32";
10053                                 m_asmTypes["outCast"] = "OpBitcast";
10054                                 m_elements = 2;
10055                                 out_change = true;
10056                         }
10057                         if (in_change && !out_change)
10058                         {
10059                                 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10060                         }
10061                         if (!in_change && out_change)
10062                         {
10063                                 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10064                         }
10065                 }
10066
10067                 // Safety check for implementation.
10068                 if (m_elements < 1 || m_elements > 2)
10069                         TCU_THROW(InternalError, "Unsupported number of elements");
10070
10071                 m_asmTypes["inputType"]         = getAsmTypeName(from, m_elements);
10072                 m_asmTypes["outputType"]        = getAsmTypeName(to, m_elements);
10073
10074                 m_inputBuffer = getBuffer(from, number, m_elements);
10075                 if (separateOutput)
10076                         m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10077                 else
10078                         m_outputBuffer = getBuffer(to, number, m_elements);
10079
10080                 if (usesInt8(from, to))
10081                 {
10082                         bool requiresInt8Capability = true;
10083                         if (instruction == "OpUConvert" || instruction == "OpSConvert")
10084                         {
10085                                 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10086                                 if (usesInt32(from, to))
10087                                         requiresInt8Capability = false;
10088                         }
10089
10090                         caps += "OpCapability StorageBuffer8BitAccess\n";
10091                         if (requiresInt8Capability)
10092                                 caps += "OpCapability Int8\n";
10093
10094                         decl += "%i8         = OpTypeInt 8 1\n"
10095                                         "%u8         = OpTypeInt 8 0\n";
10096
10097                         if (m_elements == 2)
10098                         {
10099                                 decl += "%v2i8       = OpTypeVector %i8 2\n"
10100                                                 "%v2u8       = OpTypeVector %u8 2\n";
10101                         }
10102                         exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10103                 }
10104
10105                 if (usesInt16(from, to))
10106                 {
10107                         bool requiresInt16Capability = true;
10108
10109                         if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10110                         {
10111                                 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10112                                 if (usesInt32(from, to) || usesFloat32(from, to))
10113                                         requiresInt16Capability = false;
10114                         }
10115
10116                         decl += "%i16        = OpTypeInt 16 1\n"
10117                                         "%u16        = OpTypeInt 16 0\n";
10118                         if (m_elements == 2)
10119                         {
10120                                 decl += "%v2i16      = OpTypeVector %i16 2\n"
10121                                                 "%v2u16      = OpTypeVector %u16 2\n";
10122                         }
10123                         else
10124                         {
10125                                 decl += "%i16vec2    = OpTypeVector %i16 2\n";
10126                         }
10127
10128                         // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10129                         if (requiresInt16Capability || !m_useStorageExt)
10130                                 caps += "OpCapability Int16\n";
10131                 }
10132
10133                 if (usesFloat16(from, to))
10134                 {
10135                         decl += "%f16        = OpTypeFloat 16\n";
10136                         if (m_elements == 2)
10137                         {
10138                                 decl += "%v2f16      = OpTypeVector %f16 2\n";
10139                         }
10140
10141                         // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10142                         if (!usesFloat32(from, to) || !m_useStorageExt)
10143                                 caps += "OpCapability Float16\n";
10144                 }
10145
10146                 if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10147                 {
10148                         caps += "OpCapability StorageUniformBufferBlock16\n";
10149                         exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10150                 }
10151
10152                 if (usesInt64(from, to))
10153                 {
10154                         caps += "OpCapability Int64\n";
10155                         decl += "%i64        = OpTypeInt 64 1\n"
10156                                         "%u64        = OpTypeInt 64 0\n";
10157                         if (m_elements == 2)
10158                         {
10159                                 decl += "%v2i64      = OpTypeVector %i64 2\n"
10160                                                 "%v2u64      = OpTypeVector %u64 2\n";
10161                         }
10162                 }
10163
10164                 if (usesFloat64(from, to))
10165                 {
10166                         caps += "OpCapability Float64\n";
10167                         decl += "%f64        = OpTypeFloat 64\n";
10168                         if (m_elements == 2)
10169                         {
10170                                 decl += "%v2f64        = OpTypeVector %f64 2\n";
10171                         }
10172                 }
10173
10174                 m_asmTypes["datatype_capabilities"]             = caps;
10175                 m_asmTypes["datatype_additional_decl"]  = decl;
10176                 m_asmTypes["datatype_extensions"]               = exts;
10177         }
10178
10179         ConversionDataType              m_fromType;
10180         ConversionDataType              m_toType;
10181         deUint32                                m_elements;
10182         bool                                    m_useStorageExt;
10183         string                                  m_name;
10184         map<string, string>             m_asmTypes;
10185         BufferSp                                m_inputBuffer;
10186         BufferSp                                m_outputBuffer;
10187 };
10188
10189 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10190 {
10191         map<string, string> params = convertCase.m_asmTypes;
10192
10193         params["instruction"]   = instruction;
10194         params["inDecorator"]   = getByteWidthStr(convertCase.m_fromType);
10195         params["outDecorator"]  = getByteWidthStr(convertCase.m_toType);
10196
10197         std::string shader (
10198                 "OpCapability Shader\n"
10199                 "${datatype_capabilities}"
10200                 "${datatype_extensions:opt}"
10201                 "OpMemoryModel Logical GLSL450\n"
10202                 "OpEntryPoint GLCompute %main \"main\"\n"
10203                 "OpExecutionMode %main LocalSize 1 1 1\n"
10204                 "OpSource GLSL 430\n"
10205                 "OpName %main           \"main\"\n"
10206                 // Decorators
10207                 "OpDecorate %indata DescriptorSet 0\n"
10208                 "OpDecorate %indata Binding 0\n"
10209                 "OpDecorate %outdata DescriptorSet 0\n"
10210                 "OpDecorate %outdata Binding 1\n"
10211                 "OpDecorate %in_buf BufferBlock\n"
10212                 "OpDecorate %out_buf BufferBlock\n"
10213                 "OpMemberDecorate %in_buf 0 Offset 0\n"
10214                 "OpMemberDecorate %out_buf 0 Offset 0\n"
10215                 // Base types
10216                 "%void       = OpTypeVoid\n"
10217                 "%voidf      = OpTypeFunction %void\n"
10218                 "%u32        = OpTypeInt 32 0\n"
10219                 "%i32        = OpTypeInt 32 1\n"
10220                 "%f32        = OpTypeFloat 32\n"
10221                 "%v2i32      = OpTypeVector %i32 2\n"
10222                 "${datatype_additional_decl}"
10223         );
10224         if (addVectors)
10225         {
10226                 shader += "%v2u32 = OpTypeVector %u32 2\n"
10227                                         "%v2f32 = OpTypeVector %f32 2\n";
10228         }
10229         shader +=
10230                 "%uvec3      = OpTypeVector %u32 3\n"
10231                 // Derived types
10232                 "%in_ptr     = OpTypePointer Uniform %${inStorageType}\n"
10233                 "%out_ptr    = OpTypePointer Uniform %${outStorageType}\n"
10234                 "%in_buf     = OpTypeStruct %${inStorageType}\n"
10235                 "%out_buf    = OpTypeStruct %${outStorageType}\n"
10236                 "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
10237                 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10238                 "%indata     = OpVariable %in_bufptr Uniform\n"
10239                 "%outdata    = OpVariable %out_bufptr Uniform\n"
10240                 // Constants
10241                 "%zero       = OpConstant %i32 0\n"
10242                 // Main function
10243                 "%main       = OpFunction %void None %voidf\n"
10244                 "%label      = OpLabel\n"
10245                 "%inloc      = OpAccessChain %in_ptr %indata %zero\n"
10246                 "%outloc     = OpAccessChain %out_ptr %outdata %zero\n"
10247                 "%inval      = OpLoad %${inStorageType} %inloc\n"
10248                 "%in_cast    = ${inCast} %${inputType} %inval\n"
10249                 "%conv       = ${instruction} %${outputType} %in_cast\n"
10250                 "%out_cast   = ${outCast} %${outStorageType} %conv\n"
10251                 "              OpStore %outloc %out_cast\n"
10252                 "              OpReturn\n"
10253                 "              OpFunctionEnd\n"
10254         ;
10255
10256         return StringTemplate(shader).specialize(params);
10257 }
10258
10259 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10260 {
10261         if (instruction == "OpUConvert")
10262         {
10263                 // Convert unsigned int to unsigned int
10264                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_16,          42));
10265                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_32,          73));
10266                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_64,          121));
10267
10268                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_8,           33));
10269                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_32,          60653));
10270                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_64,          17991));
10271
10272                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_64,          904256275));
10273                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_16,          6275));
10274                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_8,           17));
10275
10276                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_32,          701256243));
10277                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_16,          4741));
10278                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_8,           65));
10279
10280                 // Zero extension for int->uint
10281                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
10282                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   209));
10283                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   251));
10284                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
10285                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   62195));
10286                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
10287
10288                 // Truncate for int->uint
10289                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
10290                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
10291                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
10292                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
10293                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
10294                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
10295         }
10296         else if (instruction == "OpSConvert")
10297         {
10298                 // Sign extension int->int
10299                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_16,            -30));
10300                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_32,            55));
10301                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_64,            -3));
10302                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_32,            14669));
10303                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_64,            -3341));
10304                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_64,            973610259));
10305
10306                 // Truncate for int->int
10307                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_8,                     81));
10308                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_8,                     -93));
10309                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_8,                     3182748172687672ll,                                     true,   56));
10310                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_16,            12382));
10311                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_32,            -972812359));
10312                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_16,            -1067742499291926803ll,                         true,   -4371));
10313
10314                 // Sign extension for int->uint
10315                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
10316                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   4294967249u));
10317                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   18446744073709551611ull));
10318                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
10319                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   18446744073709548275ull));
10320                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
10321
10322                 // Truncate for int->uint
10323                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
10324                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
10325                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
10326                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
10327                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
10328                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
10329
10330                 // Sign extension for uint->int
10331                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_16,            71));
10332                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_32,            201,                                                            true,   -55));
10333                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_64,            188,                                                            true,   -68));
10334                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_32,            14669));
10335                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_64,            62195,                                                          true,   -3341));
10336                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_64,            973610259));
10337
10338                 // Truncate for uint->int
10339                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_8,                     67));
10340                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_8,                     133,                                                            true,   -123));
10341                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_8,                     836927654193256494ull,                          true,   46));
10342                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_16,            12382));
10343                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_32,            18446744072736739257ull,                        true,   -972812359));
10344                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_16,            17379001574417624813ull,                        true,   -4371));
10345
10346                 // Convert i16vec2 to i32vec2 and vice versa
10347                 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10348                 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10349                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_VEC2_SIGNED_16,       DATA_TYPE_VEC2_SIGNED_32,       (33413u << 16)                  | 27593,        true,   (4294935173ull << 32)   | 27593));
10350                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_VEC2_SIGNED_32,       DATA_TYPE_VEC2_SIGNED_16,       (4294935173ull << 32)   | 27593,        true,   (33413u << 16)                  | 27593));
10351         }
10352         else if (instruction == "OpFConvert")
10353         {
10354                 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10355                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_64,                     0x449a4000,                                                     true,   0x4093480000000000));
10356                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_32,                     0x4093480000000000,                                     true,   0x449a4000));
10357
10358                 // Conversion to/from 32-bit floats are supported by both 16-bit
10359                 // storage and Float16. The tests are duplicated to exercise both
10360                 // cases.
10361                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_16,                     0x449a4000,                                                     true,   0x64D2));
10362                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_32,                     0x64D2,                                                         true,   0x449a4000));
10363                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_16,                     0x449a4000,                                                     true,   0x64D2,                                 "no_storage",   false));
10364                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_32,                     0x64D2,                                                         true,   0x449a4000,                             "no_storage",   false));
10365
10366                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_64,                     0x64D2,                                                         true,   0x4093480000000000));
10367                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_16,                     0x4093480000000000,                                     true,   0x64D2));
10368         }
10369         else if (instruction == "OpConvertFToU")
10370         {
10371                 // Normal numbers from uint8 range
10372                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x5020,                                                         true,   33,                                                                     "33",   false));
10373                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x42280000,                                                     true,   42,                                                                     "42"));
10374                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x4067800000000000ull,                          true,   188,                                                            "188"));
10375
10376                 // Maximum uint8 value
10377                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x5BF8,                                                         true,   255,                                                            "max",  false));
10378                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x437F0000,                                                     true,   255,                                                            "max"));
10379                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x406FE00000000000ull,                          true,   255,                                                            "max"));
10380
10381                 // +0
10382                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x0000,                                                         true,   0,                                                                      "p0",   false));
10383                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x00000000,                                                     true,   0,                                                                      "p0"));
10384                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x0000000000000000ull,                          true,   0,                                                                      "p0"));
10385
10386                 // -0
10387                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x8000,                                                         true,   0,                                                                      "m0",   false));
10388                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x80000000,                                                     true,   0,                                                                      "m0"));
10389                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x8000000000000000ull,                          true,   0,                                                                      "m0"));
10390
10391                 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10392                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x64D2,                                                         true,   1234,                                                           "1234", false));
10393                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x64D2,                                                         true,   1234,                                                           "1234", false));
10394                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x64D2,                                                         true,   1234,                                                           "1234", false));
10395
10396                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10397                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x7BFF,                                                         true,   65504,                                                          "max",  false));
10398                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x7BFF,                                                         true,   65504,                                                          "max",  false));
10399                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x7BFF,                                                         true,   65504,                                                          "max",  false));
10400
10401                 // +0
10402                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x0000,                                                         true,   0,                                                                      "p0",   false));
10403                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x0000,                                                         true,   0,                                                                      "p0",   false));
10404                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x0000,                                                         true,   0,                                                                      "p0",   false));
10405
10406                 // -0
10407                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x8000,                                                         true,   0,                                                                      "m0",   false));
10408                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x8000,                                                         true,   0,                                                                      "m0",   false));
10409                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x8000,                                                         true,   0,                                                                      "m0",   false));
10410
10411                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_16,          0x449a4000,                                                     true,   1234));
10412                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_32,          0x449a4000,                                                     true,   1234));
10413                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_64,          0x449a4000,                                                     true,   1234));
10414                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_16,          0x4093480000000000,                                     true,   1234));
10415                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_32,          0x4093480000000000,                                     true,   1234));
10416                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_64,          0x4093480000000000,                                     true,   1234));
10417         }
10418         else if (instruction == "OpConvertUToF")
10419         {
10420                 // Normal numbers from uint8 range
10421                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_16,                     116,                                                            true,   0x5740,                                                         "116",  false));
10422                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_32,                     232,                                                            true,   0x43680000,                                                     "232"));
10423                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_64,                     164,                                                            true,   0x4064800000000000ull,                          "164"));
10424
10425                 // Maximum uint8 value
10426                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_16,                     255,                                                            true,   0x5BF8,                                                         "max",  false));
10427                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_32,                     255,                                                            true,   0x437F0000,                                                     "max"));
10428                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_64,                     255,                                                            true,   0x406FE00000000000ull,                          "max"));
10429
10430                 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10431                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234", false));
10432                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234", false));
10433                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234", false));
10434
10435                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10436                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
10437                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
10438                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max",  false));
10439
10440                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10441                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10442                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10443                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10444                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10445                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10446         }
10447         else if (instruction == "OpConvertFToS")
10448         {
10449                 // Normal numbers from int8 range
10450                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0xC980,                                                         true,   -11,                                                            "m11",  false));
10451                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0xC2140000,                                                     true,   -37,                                                            "m37"));
10452                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0xC050800000000000ull,                          true,   -66,                                                            "m66"));
10453
10454                 // Minimum int8 value
10455                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0xD800,                                                         true,   -128,                                                           "min",  false));
10456                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0xC3000000,                                                     true,   -128,                                                           "min"));
10457                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0xC060000000000000ull,                          true,   -128,                                                           "min"));
10458
10459                 // Maximum int8 value
10460                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x57F0,                                                         true,   127,                                                            "max",  false));
10461                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x42FE0000,                                                     true,   127,                                                            "max"));
10462                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x405FC00000000000ull,                          true,   127,                                                            "max"));
10463
10464                 // +0
10465                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x0000,                                                         true,   0,                                                                      "p0",   false));
10466                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x00000000,                                                     true,   0,                                                                      "p0"));
10467                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x0000000000000000ull,                          true,   0,                                                                      "p0"));
10468
10469                 // -0
10470                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x8000,                                                         true,   0,                                                                      "m0",   false));
10471                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x80000000,                                                     true,   0,                                                                      "m0"));
10472                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x8000000000000000ull,                          true,   0,                                                                      "m0"));
10473
10474                 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10475                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0xE4D2,                                                         true,   -1234,                                                          "m1234",        false));
10476                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0xE4D2,                                                         true,   -1234,                                                          "m1234",        false));
10477                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0xE4D2,                                                         true,   -1234,                                                          "m1234",        false));
10478
10479                 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10480                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0xF800,                                                         true,   -32768,                                                         "min",  false));
10481                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0xF800,                                                         true,   -32768,                                                         "min",  false));
10482                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0xF800,                                                         true,   -32768,                                                         "min",  false));
10483
10484                 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10485                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x77FF,                                                         true,   32752,                                                          "max",  false));
10486                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x77FF,                                                         true,   32752,                                                          "max",  false));
10487                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x77FF,                                                         true,   32752,                                                          "max",  false));
10488
10489                 // +0
10490                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x0000,                                                         true,   0,                                                                      "p0",   false));
10491                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x0000,                                                         true,   0,                                                                      "p0",   false));
10492                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x0000,                                                         true,   0,                                                                      "p0",   false));
10493
10494                 // -0
10495                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x8000,                                                         true,   0,                                                                      "m0",   false));
10496                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x8000,                                                         true,   0,                                                                      "m0",   false));
10497                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x8000,                                                         true,   0,                                                                      "m0",   false));
10498
10499                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0xc49a4000,                                                     true,   -1234));
10500                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_32,            0xc49a4000,                                                     true,   -1234));
10501                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_64,            0xc49a4000,                                                     true,   -1234));
10502                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_16,            0xc093480000000000,                                     true,   -1234));
10503                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_32,            0xc093480000000000,                                     true,   -1234));
10504                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_64,            0xc093480000000000,                                     true,   -1234));
10505                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0x453b9000,                                                     true,    3001,                                                          "p3001"));
10506                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0xc53b9000,                                                     true,   -3001,                                                          "m3001"));
10507         }
10508         else if (instruction == "OpConvertSToF")
10509         {
10510                 // Normal numbers from int8 range
10511                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     -12,                                                            true,   0xCA00,                                                         "m21",  false));
10512                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     -21,                                                            true,   0xC1A80000,                                                     "m21"));
10513                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     -99,                                                            true,   0xC058C00000000000ull,                          "m99"));
10514
10515                 // Minimum int8 value
10516                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     -128,                                                           true,   0xD800,                                                         "min",  false));
10517                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     -128,                                                           true,   0xC3000000,                                                     "min"));
10518                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     -128,                                                           true,   0xC060000000000000ull,                          "min"));
10519
10520                 // Maximum int8 value
10521                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     127,                                                            true,   0x57F0,                                                         "max",  false));
10522                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     127,                                                            true,   0x42FE0000,                                                     "max"));
10523                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     127,                                                            true,   0x405FC00000000000ull,                          "max"));
10524
10525                 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10526                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234",        false));
10527                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234",        false));
10528                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234",        false));
10529
10530                 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10531                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min",  false));
10532                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min",  false));
10533                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min",  false));
10534
10535                 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10536                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max",  false));
10537                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max",  false));
10538                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max",  false));
10539
10540                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
10541                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
10542                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
10543                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
10544                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
10545                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
10546         }
10547         else
10548                 DE_FATAL("Unknown instruction");
10549 }
10550
10551 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
10552 {
10553         map<string, string> params = convertCase.m_asmTypes;
10554         map<string, string> fragments;
10555
10556         params["instruction"] = instruction;
10557         params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
10558
10559         const StringTemplate decoration (
10560                 "      OpDecorate %SSBOi DescriptorSet 0\n"
10561                 "      OpDecorate %SSBOo DescriptorSet 0\n"
10562                 "      OpDecorate %SSBOi Binding 0\n"
10563                 "      OpDecorate %SSBOo Binding 1\n"
10564                 "      OpDecorate %s_SSBOi Block\n"
10565                 "      OpDecorate %s_SSBOo Block\n"
10566                 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
10567                 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
10568
10569         const StringTemplate pre_main (
10570                 "${datatype_additional_decl:opt}"
10571                 "    %ptr_in = OpTypePointer StorageBuffer %${inputType}\n"
10572                 "   %ptr_out = OpTypePointer StorageBuffer %${outputType}\n"
10573                 "   %s_SSBOi = OpTypeStruct %${inputType}\n"
10574                 "   %s_SSBOo = OpTypeStruct %${outputType}\n"
10575                 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
10576                 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
10577                 "     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
10578                 "     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
10579
10580         const StringTemplate testfun (
10581                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10582                 "%param     = OpFunctionParameter %v4f32\n"
10583                 "%label     = OpLabel\n"
10584                 "%iLoc      = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
10585                 "%oLoc      = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
10586                 "%valIn     = OpLoad %${inputType} %iLoc\n"
10587                 "%valOut    = ${instruction} %${outputType} %valIn\n"
10588                 "             OpStore %oLoc %valOut\n"
10589                 "             OpReturnValue %param\n"
10590                 "             OpFunctionEnd\n");
10591
10592         params["datatype_extensions"] =
10593                 params["datatype_extensions"] +
10594                 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
10595
10596         fragments["capability"] = params["datatype_capabilities"];
10597         fragments["extension"]  = params["datatype_extensions"];
10598         fragments["decoration"] = decoration.specialize(params);
10599         fragments["pre_main"]   = pre_main.specialize(params);
10600         fragments["testfun"]    = testfun.specialize(params);
10601
10602         return fragments;
10603 }
10604
10605 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
10606 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
10607 {
10608         de::MovePtr<tcu::TestCaseGroup>         group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
10609         vector<ConvertCase>                                     testCases;
10610         createConvertCases(testCases, instruction);
10611
10612         for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
10613         {
10614                 ComputeShaderSpec spec;
10615                 spec.assembly                   = getConvertCaseShaderStr(instruction, *test, true);
10616                 spec.numWorkGroups              = IVec3(1, 1, 1);
10617                 spec.inputs.push_back   (test->m_inputBuffer);
10618                 spec.outputs.push_back  (test->m_outputBuffer);
10619
10620                 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
10621
10622                 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
10623         }
10624         return group.release();
10625 }
10626
10627 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
10628 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
10629 {
10630         de::MovePtr<tcu::TestCaseGroup>         group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
10631         vector<ConvertCase>                                     testCases;
10632         createConvertCases(testCases, instruction);
10633
10634         for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
10635         {
10636                 map<string, string>     fragments               = getConvertCaseFragments(instruction, *test);
10637                 VulkanFeatures          vulkanFeatures;
10638                 GraphicsResources       resources;
10639                 vector<string>          extensions;
10640                 SpecConstants           noSpecConstants;
10641                 PushConstants           noPushConstants;
10642                 GraphicsInterfaces      noInterfaces;
10643                 tcu::RGBA                       defaultColors[4];
10644
10645                 getDefaultColors                        (defaultColors);
10646                 resources.inputs.push_back      (Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10647                 resources.outputs.push_back     (Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10648                 extensions.push_back            ("VK_KHR_storage_buffer_storage_class");
10649
10650                 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
10651
10652                 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics      = true;
10653                 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics            = true;
10654
10655                 createTestsForAllStages(
10656                         test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
10657                         noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
10658         }
10659         return group.release();
10660 }
10661
10662 // Constant-Creation Instructions: OpConstant, OpConstantComposite
10663 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
10664 {
10665         de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests                (new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
10666         RGBA                                                    inputColors[4];
10667         RGBA                                                    outputColors[4];
10668         vector<string>                                  extensions;
10669         GraphicsResources                               resources;
10670         VulkanFeatures                                  features;
10671
10672         const char                                              functionStart[]  =
10673                 "%test_code             = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10674                 "%param1                = OpFunctionParameter %v4f32\n"
10675                 "%lbl                   = OpLabel\n";
10676
10677         const char                                              functionEnd[]           =
10678                 "%transformed_param_32  = OpFConvert %v4f32 %transformed_param\n"
10679                 "                         OpReturnValue %transformed_param_32\n"
10680                 "                         OpFunctionEnd\n";
10681
10682         struct NameConstantsCode
10683         {
10684                 string name;
10685                 string constants;
10686                 string code;
10687         };
10688
10689 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
10690                         "%f16                  = OpTypeFloat 16\n"                                                 \
10691                         "%c_f16_0              = OpConstant %f16 0.0\n"                                            \
10692                         "%c_f16_0_5            = OpConstant %f16 0.5\n"                                            \
10693                         "%c_f16_1              = OpConstant %f16 1.0\n"                                            \
10694                         "%v4f16                = OpTypeVector %f16 4\n"                                            \
10695                         "%fp_f16               = OpTypePointer Function %f16\n"                                    \
10696                         "%fp_v4f16             = OpTypePointer Function %v4f16\n"                                  \
10697                         "%c_v4f16_1_1_1_1      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
10698                         "%a4f16                = OpTypeArray %f16 %c_u32_4\n"                                      \
10699
10700         NameConstantsCode                               tests[] =
10701         {
10702                 {
10703                         "vec4",
10704
10705                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10706                         "%cval                 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
10707                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10708                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %cval\n"
10709                 },
10710                 {
10711                         "struct",
10712
10713                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10714                         "%stype                = OpTypeStruct %v4f16 %f16\n"
10715                         "%fp_stype             = OpTypePointer Function %stype\n"
10716                         "%f16_n_1              = OpConstant %f16 -1.0\n"
10717                         "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
10718                         "%cvec                 = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
10719                         "%cval                 = OpConstantComposite %stype %cvec %f16_n_1\n",
10720
10721                         "%v                    = OpVariable %fp_stype Function %cval\n"
10722                         "%vec_ptr              = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
10723                         "%f16_ptr              = OpAccessChain %fp_f16 %v %c_u32_1\n"
10724                         "%vec_val              = OpLoad %v4f16 %vec_ptr\n"
10725                         "%f16_val              = OpLoad %f16 %f16_ptr\n"
10726                         "%tmp1                 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
10727                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10728                         "%tmp2                 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
10729                         "%transformed_param    = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
10730                 },
10731                 {
10732                         // [1|0|0|0.5] [x] = x + 0.5
10733                         // [0|1|0|0.5] [y] = y + 0.5
10734                         // [0|0|1|0.5] [z] = z + 0.5
10735                         // [0|0|0|1  ] [1] = 1
10736                         "matrix",
10737
10738                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10739                         "%mat4x4_f16           = OpTypeMatrix %v4f16 4\n"
10740                         "%v4f16_1_0_0_0        = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
10741                         "%v4f16_0_1_0_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
10742                         "%v4f16_0_0_1_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
10743                         "%v4f16_0_5_0_5_0_5_1  = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
10744                         "%cval                 = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
10745
10746                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10747                         "%transformed_param    = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
10748                 },
10749                 {
10750                         "array",
10751
10752                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10753                         "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
10754                         "%fp_a4f16             = OpTypePointer Function %a4f16\n"
10755                         "%f16_n_1              = OpConstant %f16 -1.0\n"
10756                         "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
10757                         "%carr                 = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
10758
10759                         "%v                    = OpVariable %fp_a4f16 Function %carr\n"
10760                         "%f                    = OpAccessChain %fp_f16 %v %c_u32_0\n"
10761                         "%f1                   = OpAccessChain %fp_f16 %v %c_u32_1\n"
10762                         "%f2                   = OpAccessChain %fp_f16 %v %c_u32_2\n"
10763                         "%f3                   = OpAccessChain %fp_f16 %v %c_u32_3\n"
10764                         "%f_val                = OpLoad %f16 %f\n"
10765                         "%f1_val               = OpLoad %f16 %f1\n"
10766                         "%f2_val               = OpLoad %f16 %f2\n"
10767                         "%f3_val               = OpLoad %f16 %f3\n"
10768                         "%ftot1                = OpFAdd %f16 %f_val %f1_val\n"
10769                         "%ftot2                = OpFAdd %f16 %ftot1 %f2_val\n"
10770                         "%ftot3                = OpFAdd %f16 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
10771                         "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
10772                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10773                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
10774                 },
10775                 {
10776                         //
10777                         // [
10778                         //   {
10779                         //      0.0,
10780                         //      [ 1.0, 1.0, 1.0, 1.0]
10781                         //   },
10782                         //   {
10783                         //      1.0,
10784                         //      [ 0.0, 0.5, 0.0, 0.0]
10785                         //   }, //     ^^^
10786                         //   {
10787                         //      0.0,
10788                         //      [ 1.0, 1.0, 1.0, 1.0]
10789                         //   }
10790                         // ]
10791                         "array_of_struct_of_array",
10792
10793                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10794                         "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
10795                         "%fp_a4f16             = OpTypePointer Function %a4f16\n"
10796                         "%stype                = OpTypeStruct %f16 %a4f16\n"
10797                         "%a3stype              = OpTypeArray %stype %c_u32_3\n"
10798                         "%fp_a3stype           = OpTypePointer Function %a3stype\n"
10799                         "%ca4f16_0             = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
10800                         "%ca4f16_1             = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
10801                         "%cstype1              = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
10802                         "%cstype2              = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
10803                         "%carr                 = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
10804
10805                         "%v                    = OpVariable %fp_a3stype Function %carr\n"
10806                         "%f                    = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
10807                         "%f_l                  = OpLoad %f16 %f\n"
10808                         "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
10809                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10810                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
10811                 }
10812         };
10813
10814         getHalfColorsFullAlpha(inputColors);
10815         outputColors[0] = RGBA(255, 255, 255, 255);
10816         outputColors[1] = RGBA(255, 127, 127, 255);
10817         outputColors[2] = RGBA(127, 255, 127, 255);
10818         outputColors[3] = RGBA(127, 127, 255, 255);
10819
10820         extensions.push_back("VK_KHR_shader_float16_int8");
10821         features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
10822
10823         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
10824         {
10825                 map<string, string> fragments;
10826
10827                 fragments["capability"] = "OpCapability Float16\n";
10828                 fragments["pre_main"]   = tests[testNdx].constants;
10829                 fragments["testfun"]    = string(functionStart) + tests[testNdx].code + functionEnd;
10830
10831                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
10832         }
10833         return opConstantCompositeTests.release();
10834 }
10835
10836 template<typename T>
10837 void finalizeTestsCreation (T&                                                  specResource,
10838                                                         const map<string, string>&      fragments,
10839                                                         tcu::TestContext&                       testCtx,
10840                                                         tcu::TestCaseGroup&                     testGroup,
10841                                                         const std::string&                      testName,
10842                                                         const VulkanFeatures&           vulkanFeatures,
10843                                                         const vector<string>&           extensions,
10844                                                         const IVec3&                            numWorkGroups);
10845
10846 template<>
10847 void finalizeTestsCreation (GraphicsResources&                  specResource,
10848                                                         const map<string, string>&      fragments,
10849                                                         tcu::TestContext&                       ,
10850                                                         tcu::TestCaseGroup&                     testGroup,
10851                                                         const std::string&                      testName,
10852                                                         const VulkanFeatures&           vulkanFeatures,
10853                                                         const vector<string>&           extensions,
10854                                                         const IVec3&                            )
10855 {
10856         RGBA defaultColors[4];
10857         getDefaultColors(defaultColors);
10858
10859         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures);
10860 }
10861
10862 template<>
10863 void finalizeTestsCreation (ComputeShaderSpec&                  specResource,
10864                                                         const map<string, string>&      fragments,
10865                                                         tcu::TestContext&                       testCtx,
10866                                                         tcu::TestCaseGroup&                     testGroup,
10867                                                         const std::string&                      testName,
10868                                                         const VulkanFeatures&           vulkanFeatures,
10869                                                         const vector<string>&           extensions,
10870                                                         const IVec3&                            numWorkGroups)
10871 {
10872         specResource.numWorkGroups = numWorkGroups;
10873         specResource.requestedVulkanFeatures = vulkanFeatures;
10874         specResource.extensions = extensions;
10875
10876         specResource.assembly = makeComputeShaderAssembly(fragments);
10877
10878         testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
10879 }
10880
10881 template<class SpecResource>
10882 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
10883 {
10884         const string                                            nan                                     = nanSupported ? "_nan" : "";
10885         const string                                            groupName                       = "logical" + nan;
10886         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
10887
10888         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
10889         const string                                            spvCapabilities         = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
10890         const string                                            spvExtensions           = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
10891         const string                                            spvExecutionMode        = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
10892         const deUint32                                          numDataPointsScalar     = 16;
10893         const deUint32                                          numDataPointsVector     = 14;
10894         const vector<deFloat16>                         float16DataScalar       = getFloat16s(rnd, numDataPointsScalar);
10895         const vector<deFloat16>                         float16DataVector       = getFloat16s(rnd, numDataPointsVector);
10896         const vector<deFloat16>                         float16Data1            = squarize(float16DataScalar, 0);                       // Total Size: square(sizeof(float16DataScalar))
10897         const vector<deFloat16>                         float16Data2            = squarize(float16DataScalar, 1);
10898         const vector<deFloat16>                         float16DataVec1         = squarizeVector(float16DataVector, 0);         // Total Size: 2 * (square(square(sizeof(float16DataVector))))
10899         const vector<deFloat16>                         float16DataVec2         = squarizeVector(float16DataVector, 1);
10900         const vector<deFloat16>                         float16OutDummy         (float16Data1.size(), 0);
10901         const vector<deFloat16>                         float16OutVecDummy      (float16DataVec1.size(), 0);
10902
10903         struct TestOp
10904         {
10905                 const char*             opCode;
10906                 VerifyIOFunc    verifyFuncNan;
10907                 VerifyIOFunc    verifyFuncNonNan;
10908                 const deUint32  argCount;
10909         };
10910
10911         const TestOp    testOps[]       =
10912         {
10913                 { "OpIsNan"                                             ,       compareFP16Logical<fp16isNan,                           true,  false, true>,    compareFP16Logical<fp16isNan,                           true,  false, false>,   1       },
10914                 { "OpIsInf"                                             ,       compareFP16Logical<fp16isInf,                           true,  false, true>,    compareFP16Logical<fp16isInf,                           true,  false, false>,   1       },
10915                 { "OpFOrdEqual"                                 ,       compareFP16Logical<fp16isEqual,                         false, true,  true>,    compareFP16Logical<fp16isEqual,                         false, true,  false>,   2       },
10916                 { "OpFUnordEqual"                               ,       compareFP16Logical<fp16isEqual,                         false, false, true>,    compareFP16Logical<fp16isEqual,                         false, false, false>,   2       },
10917                 { "OpFOrdNotEqual"                              ,       compareFP16Logical<fp16isUnequal,                       false, true,  true>,    compareFP16Logical<fp16isUnequal,                       false, true,  false>,   2       },
10918                 { "OpFUnordNotEqual"                    ,       compareFP16Logical<fp16isUnequal,                       false, false, true>,    compareFP16Logical<fp16isUnequal,                       false, false, false>,   2       },
10919                 { "OpFOrdLessThan"                              ,       compareFP16Logical<fp16isLess,                          false, true,  true>,    compareFP16Logical<fp16isLess,                          false, true,  false>,   2       },
10920                 { "OpFUnordLessThan"                    ,       compareFP16Logical<fp16isLess,                          false, false, true>,    compareFP16Logical<fp16isLess,                          false, false, false>,   2       },
10921                 { "OpFOrdGreaterThan"                   ,       compareFP16Logical<fp16isGreater,                       false, true,  true>,    compareFP16Logical<fp16isGreater,                       false, true,  false>,   2       },
10922                 { "OpFUnordGreaterThan"                 ,       compareFP16Logical<fp16isGreater,                       false, false, true>,    compareFP16Logical<fp16isGreater,                       false, false, false>,   2       },
10923                 { "OpFOrdLessThanEqual"                 ,       compareFP16Logical<fp16isLessOrEqual,           false, true,  true>,    compareFP16Logical<fp16isLessOrEqual,           false, true,  false>,   2       },
10924                 { "OpFUnordLessThanEqual"               ,       compareFP16Logical<fp16isLessOrEqual,           false, false, true>,    compareFP16Logical<fp16isLessOrEqual,           false, false, false>,   2       },
10925                 { "OpFOrdGreaterThanEqual"              ,       compareFP16Logical<fp16isGreaterOrEqual,        false, true,  true>,    compareFP16Logical<fp16isGreaterOrEqual,        false, true,  false>,   2       },
10926                 { "OpFUnordGreaterThanEqual"    ,       compareFP16Logical<fp16isGreaterOrEqual,        false, false, true>,    compareFP16Logical<fp16isGreaterOrEqual,        false, false, false>,   2       },
10927         };
10928
10929         { // scalar cases
10930                 const StringTemplate preMain
10931                 (
10932                         "      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
10933                         "     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
10934                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
10935                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
10936                         "            %f16 = OpTypeFloat 16\n"
10937                         "          %v2f16 = OpTypeVector %f16 2\n"
10938                         "        %c_f16_0 = OpConstant %f16 0.0\n"
10939                         "        %c_f16_1 = OpConstant %f16 1.0\n"
10940                         "         %up_u32 = OpTypePointer Uniform %u32\n"
10941                         "         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
10942                         "         %SSBO16 = OpTypeStruct %ra_u32\n"
10943                         "      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
10944                         "     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
10945                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
10946                         "      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
10947                         "      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
10948                         "       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
10949                 );
10950
10951                 const StringTemplate decoration
10952                 (
10953                         "OpDecorate %ra_u32 ArrayStride 4\n"
10954                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
10955                         "OpDecorate %SSBO16 BufferBlock\n"
10956                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
10957                         "OpDecorate %ssbo_src0 Binding 0\n"
10958                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
10959                         "OpDecorate %ssbo_src1 Binding 1\n"
10960                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
10961                         "OpDecorate %ssbo_dst Binding 2\n"
10962                 );
10963
10964                 const StringTemplate testFun
10965                 (
10966                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10967                         "    %param = OpFunctionParameter %v4f32\n"
10968
10969                         "    %entry = OpLabel\n"
10970                         "        %i = OpVariable %fp_i32 Function\n"
10971                         "             OpStore %i %c_i32_0\n"
10972                         "             OpBranch %loop\n"
10973
10974                         "     %loop = OpLabel\n"
10975                         "    %i_cmp = OpLoad %i32 %i\n"
10976                         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
10977                         "             OpLoopMerge %merge %next None\n"
10978                         "             OpBranchConditional %lt %write %merge\n"
10979
10980                         "    %write = OpLabel\n"
10981                         "      %ndx = OpLoad %i32 %i\n"
10982
10983                         " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
10984
10985                         "${op_arg1_calc}"
10986
10987                         " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
10988                         "  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
10989                         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
10990                         "             OpBranch %next\n"
10991
10992                         "     %next = OpLabel\n"
10993                         "    %i_cur = OpLoad %i32 %i\n"
10994                         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
10995                         "             OpStore %i %i_new\n"
10996                         "             OpBranch %loop\n"
10997
10998                         "    %merge = OpLabel\n"
10999                         "             OpReturnValue %param\n"
11000
11001                         "             OpFunctionEnd\n"
11002                 );
11003
11004                 const StringTemplate arg1Calc
11005                 (
11006                         " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11007                 );
11008
11009                 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11010                 {
11011                         const size_t            iterations              = float16Data1.size();
11012                         const TestOp&           testOp                  = testOps[testOpsIdx];
11013                         const string            testName                = de::toLower(string(testOp.opCode)) + "_scalar";
11014                         SpecResource            specResource;
11015                         map<string, string>     specs;
11016                         VulkanFeatures          features;
11017                         map<string, string>     fragments;
11018                         vector<string>          extensions;
11019
11020                         specs["num_data_points"]        = de::toString(iterations);
11021                         specs["op_code"]                        = testOp.opCode;
11022                         specs["op_arg1"]                        = (testOp.argCount == 1) ? "" : "%val_src1";
11023                         specs["op_arg1_calc"]           = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11024
11025                         fragments["extension"]          = spvExtensions;
11026                         fragments["capability"]         = spvCapabilities;
11027                         fragments["execution_mode"]     = spvExecutionMode;
11028                         fragments["decoration"]         = decoration.specialize(specs);
11029                         fragments["pre_main"]           = preMain.specialize(specs);
11030                         fragments["testfun"]            = testFun.specialize(specs);
11031                         fragments["testfun"]            += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11032                         if (testOp.argCount > 1)
11033                         {
11034                                 fragments["testfun"]    += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11035                         }
11036                         fragments["testfun"]            += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11037
11038                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11039                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11040                         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11041                         specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11042
11043                         extensions.push_back("VK_KHR_shader_float16_int8");
11044
11045                         if (nanSupported)
11046                         {
11047                                 extensions.push_back("VK_KHR_shader_float_controls");
11048
11049                                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11050                         }
11051
11052                         features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
11053
11054                         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11055                 }
11056         }
11057         { // vector cases
11058                 const StringTemplate preMain
11059                 (
11060                         "        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11061                         "           %v2bool = OpTypeVector %bool 2\n"
11062                         "              %f16 = OpTypeFloat 16\n"
11063                         "          %c_f16_0 = OpConstant %f16 0.0\n"
11064                         "          %c_f16_1 = OpConstant %f16 1.0\n"
11065                         "            %v2f16 = OpTypeVector %f16 2\n"
11066                         "      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11067                         "      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11068                         "           %up_u32 = OpTypePointer Uniform %u32\n"
11069                         "           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11070                         "           %SSBO16 = OpTypeStruct %ra_u32\n"
11071                         "        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11072                         "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11073                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11074                         "        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11075                         "        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11076                         "         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11077                 );
11078
11079                 const StringTemplate decoration
11080                 (
11081                         "OpDecorate %ra_u32 ArrayStride 4\n"
11082                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
11083                         "OpDecorate %SSBO16 BufferBlock\n"
11084                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11085                         "OpDecorate %ssbo_src0 Binding 0\n"
11086                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11087                         "OpDecorate %ssbo_src1 Binding 1\n"
11088                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
11089                         "OpDecorate %ssbo_dst Binding 2\n"
11090                 );
11091
11092                 const StringTemplate testFun
11093                 (
11094                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11095                         "    %param = OpFunctionParameter %v4f32\n"
11096
11097                         "    %entry = OpLabel\n"
11098                         "        %i = OpVariable %fp_i32 Function\n"
11099                         "             OpStore %i %c_i32_0\n"
11100                         "             OpBranch %loop\n"
11101
11102                         "     %loop = OpLabel\n"
11103                         "    %i_cmp = OpLoad %i32 %i\n"
11104                         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11105                         "             OpLoopMerge %merge %next None\n"
11106                         "             OpBranchConditional %lt %write %merge\n"
11107
11108                         "    %write = OpLabel\n"
11109                         "      %ndx = OpLoad %i32 %i\n"
11110
11111                         " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11112
11113                         "${op_arg1_calc}"
11114
11115                         " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11116                         "  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11117                         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11118                         "             OpBranch %next\n"
11119
11120                         "     %next = OpLabel\n"
11121                         "    %i_cur = OpLoad %i32 %i\n"
11122                         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11123                         "             OpStore %i %i_new\n"
11124                         "             OpBranch %loop\n"
11125
11126                         "    %merge = OpLabel\n"
11127                         "             OpReturnValue %param\n"
11128
11129                         "             OpFunctionEnd\n"
11130                 );
11131
11132                 const StringTemplate arg1Calc
11133                 (
11134                         " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11135                 );
11136
11137                 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11138                 {
11139                         const deUint32          itemsPerVec     = 2;
11140                         const size_t            iterations      = float16DataVec1.size() / itemsPerVec;
11141                         const TestOp&           testOp          = testOps[testOpsIdx];
11142                         const string            testName        = de::toLower(string(testOp.opCode)) + "_vector";
11143                         SpecResource            specResource;
11144                         map<string, string>     specs;
11145                         vector<string>          extensions;
11146                         VulkanFeatures          features;
11147                         map<string, string>     fragments;
11148
11149                         specs["num_data_points"]        = de::toString(iterations);
11150                         specs["op_code"]                        = testOp.opCode;
11151                         specs["op_arg1"]                        = (testOp.argCount == 1) ? "" : "%val_src1";
11152                         specs["op_arg1_calc"]           = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11153
11154                         fragments["extension"]          = spvExtensions;
11155                         fragments["capability"]         = spvCapabilities;
11156                         fragments["execution_mode"]     = spvExecutionMode;
11157                         fragments["decoration"]         = decoration.specialize(specs);
11158                         fragments["pre_main"]           = preMain.specialize(specs);
11159                         fragments["testfun"]            = testFun.specialize(specs);
11160                         fragments["testfun"]            += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11161                         if (testOp.argCount > 1)
11162                         {
11163                                 fragments["testfun"]    += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11164                         }
11165                         fragments["testfun"]            += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11166
11167                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11168                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11169                         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11170                         specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11171
11172                         extensions.push_back("VK_KHR_shader_float16_int8");
11173
11174                         if (nanSupported)
11175                         {
11176                                 extensions.push_back("VK_KHR_shader_float_controls");
11177
11178                                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11179                         }
11180
11181                         features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
11182
11183                         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11184                 }
11185         }
11186
11187         return testGroup.release();
11188 }
11189
11190 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11191 {
11192         if (inputs.size() != 1 || outputAllocs.size() != 1)
11193                 return false;
11194
11195         vector<deUint8> input1Bytes;
11196
11197         inputs[0].getBytes(input1Bytes);
11198
11199         const deUint16* const   input1AsFP16    = (const deUint16*)&input1Bytes[0];
11200         const deUint16* const   outputAsFP16    = (const deUint16*)outputAllocs[0]->getHostPtr();
11201         std::string                             error;
11202
11203         for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11204         {
11205                 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11206                 {
11207                         log << TestLog::Message << error << TestLog::EndMessage;
11208
11209                         return false;
11210                 }
11211         }
11212
11213         return true;
11214 }
11215
11216 template<class SpecResource>
11217 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11218 {
11219         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11220
11221         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11222         const StringTemplate                            capabilities            ("OpCapability Float16\n");
11223         const deUint32                                          numDataPoints           = 256;
11224         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11225         const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
11226         map<string, string>                                     fragments;
11227
11228         struct TestType
11229         {
11230                 const deUint32  typeComponents;
11231                 const char*             typeName;
11232                 const char*             typeDecls;
11233                 const char*             typeStorage;
11234                 const string            loadFunc;
11235                 const string            storeFunc;
11236         };
11237
11238         const TestType  testTypes[]     =
11239         {
11240                 {
11241                         1,
11242                         "f16",
11243                         "      %v2f16 = OpTypeVector %f16 2\n"
11244                         "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11245                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11246                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11247                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11248                         "u32_hndp",
11249                         loadScalarF16FromUint,
11250                         storeScalarF16AsUint
11251                 },
11252                 {
11253                         2,
11254                         "v2f16",
11255                         "      %v2f16 = OpTypeVector %f16 2\n"
11256                         "  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11257                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11258                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11259                         "u32_ndp",
11260                         loadV2F16FromUint,
11261                         storeV2F16AsUint
11262                 },
11263                 {
11264                         4,
11265                         "v4f16",
11266                         "      %v2f16 = OpTypeVector %f16 2\n"
11267                         "      %v4f16 = OpTypeVector %f16 4\n"
11268                         "  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11269                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11270                         "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11271                         "ra_u32_2",
11272                         loadV4F16FromUints,
11273                         storeV4F16AsUints
11274                 },
11275         };
11276
11277         const StringTemplate preMain
11278         (
11279                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11280                 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11281                 "     %v2bool = OpTypeVector %bool 2\n"
11282                 "        %f16 = OpTypeFloat 16\n"
11283                 "    %c_f16_0 = OpConstant %f16 0.0\n"
11284
11285                 "${type_decls}"
11286
11287                 "  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11288                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11289                 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11290                 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11291                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11292                 "         %up_u32 = OpTypePointer Uniform %u32\n"
11293                 "     %SSBO16 = OpTypeStruct %ra_${ts}\n"
11294                 "  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11295                 "   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11296                 "   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11297         );
11298
11299         const StringTemplate decoration
11300         (
11301                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11302                 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11303                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
11304                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11305                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11306                 "OpDecorate %SSBO16 BufferBlock\n"
11307                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11308                 "OpDecorate %ssbo_src Binding 0\n"
11309                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11310                 "OpDecorate %ssbo_dst Binding 1\n"
11311         );
11312
11313         const StringTemplate testFun
11314         (
11315                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11316                 "    %param = OpFunctionParameter %v4f32\n"
11317                 "    %entry = OpLabel\n"
11318
11319                 "        %i = OpVariable %fp_i32 Function\n"
11320                 "             OpStore %i %c_i32_0\n"
11321                 "             OpBranch %loop\n"
11322
11323                 "     %loop = OpLabel\n"
11324                 "    %i_cmp = OpLoad %i32 %i\n"
11325                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11326                 "             OpLoopMerge %merge %next None\n"
11327                 "             OpBranchConditional %lt %write %merge\n"
11328
11329                 "    %write = OpLabel\n"
11330                 "      %ndx = OpLoad %i32 %i\n"
11331
11332                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11333                 "  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11334                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11335                 "             OpBranch %next\n"
11336
11337                 "     %next = OpLabel\n"
11338                 "    %i_cur = OpLoad %i32 %i\n"
11339                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11340                 "             OpStore %i %i_new\n"
11341                 "             OpBranch %loop\n"
11342
11343                 "    %merge = OpLabel\n"
11344                 "             OpReturnValue %param\n"
11345
11346                 "             OpFunctionEnd\n"
11347
11348                 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11349                 "   %param0 = OpFunctionParameter %${tt}\n"
11350                 " %entry_pf = OpLabel\n"
11351                 "     %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11352                 "             OpReturnValue %res0\n"
11353                 "             OpFunctionEnd\n"
11354         );
11355
11356         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11357         {
11358                 const TestType&         testType                = testTypes[testTypeIdx];
11359                 const string            testName                = testType.typeName;
11360                 const deUint32          itemsPerType    = testType.typeComponents;
11361                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11362                 const size_t            typeStride              = itemsPerType * sizeof(deFloat16);
11363                 SpecResource            specResource;
11364                 map<string, string>     specs;
11365                 VulkanFeatures          features;
11366                 vector<string>          extensions;
11367
11368                 specs["num_data_points"]        = de::toString(iterations);
11369                 specs["tt"]                                     = testType.typeName;
11370                 specs["ts"]                                     = testType.typeStorage;
11371                 specs["tt_stride"]                      = de::toString(typeStride);
11372                 specs["type_decls"]                     = testType.typeDecls;
11373
11374                 fragments["capability"]         = capabilities.specialize(specs);
11375                 fragments["decoration"]         = decoration.specialize(specs);
11376                 fragments["pre_main"]           = preMain.specialize(specs);
11377                 fragments["testfun"]            = testFun.specialize(specs);
11378                 fragments["testfun"]            += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11379                 fragments["testfun"]            += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11380
11381                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11382                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11383                 specResource.verifyIO = compareFP16FunctionSetFunc;
11384
11385                 extensions.push_back("VK_KHR_shader_float16_int8");
11386
11387                 features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
11388
11389                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11390         }
11391
11392         return testGroup.release();
11393 }
11394
11395 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11396 {
11397         if (inputs.size() != 2 || outputAllocs.size() != 1)
11398                 return false;
11399
11400         vector<deUint8> input1Bytes;
11401         vector<deUint8> input2Bytes;
11402
11403         inputs[0].getBytes(input1Bytes);
11404         inputs[1].getBytes(input2Bytes);
11405
11406         DE_ASSERT(input1Bytes.size() > 0);
11407         DE_ASSERT(input2Bytes.size() > 0);
11408         DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11409
11410         const size_t                    iterations              = input2Bytes.size() / sizeof(deUint32);
11411         const size_t                    components              = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11412         const deFloat16* const  input1AsFP16    = (const deFloat16*)&input1Bytes[0];
11413         const deUint32* const   inputIndices    = (const deUint32*)&input2Bytes[0];
11414         const deFloat16* const  outputAsFP16    = (const deFloat16*)outputAllocs[0]->getHostPtr();
11415         std::string                             error;
11416
11417         DE_ASSERT(components == 2 || components == 4);
11418         DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11419
11420         for (size_t idx = 0; idx < iterations; ++idx)
11421         {
11422                 const deUint32  componentNdx    = inputIndices[idx];
11423
11424                 DE_ASSERT(componentNdx < components);
11425
11426                 const deFloat16 expected                = input1AsFP16[components * idx + componentNdx];
11427
11428                 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11429                 {
11430                         log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11431
11432                         return false;
11433                 }
11434         }
11435
11436         return true;
11437 }
11438
11439 template<class SpecResource>
11440 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11441 {
11442         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11443
11444         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11445         const deUint32                                          numDataPoints           = 256;
11446         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11447         const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
11448
11449         struct TestType
11450         {
11451                 const deUint32  typeComponents;
11452                 const size_t    typeStride;
11453                 const char*             typeName;
11454                 const char*             typeDecls;
11455                 const char*             typeStorage;
11456                 const string            loadFunction;
11457                 const string            storeFunction;
11458         };
11459
11460         const TestType  testTypes[]     =
11461         {
11462                 {
11463                         2,
11464                         2 * sizeof(deFloat16),
11465                         "v2f16",
11466                         "      %v2f16 = OpTypeVector %f16 2\n"
11467                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11468                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11469                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11470                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11471                         "u32",
11472                         loadV2F16FromUint,
11473                         storeScalarF16AsUint
11474                 },
11475                 {
11476                         3,
11477                         4 * sizeof(deFloat16),
11478                         "v3f16",
11479                         "      %v2f16 = OpTypeVector %f16 2\n"
11480                         "      %v3f16 = OpTypeVector %f16 3\n"
11481                         "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
11482                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11483                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11484                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11485                         "ra_u32_2",
11486                         loadV3F16FromUints,
11487                         storeScalarF16AsUint
11488                 },
11489                 {
11490                         4,
11491                         4 * sizeof(deFloat16),
11492                         "v4f16",
11493                         "      %v2f16 = OpTypeVector %f16 2\n"
11494                         "      %v4f16 = OpTypeVector %f16 4\n"
11495                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11496                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11497                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11498                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11499                         "ra_u32_2",
11500                         loadV4F16FromUints,
11501                         storeScalarF16AsUint
11502                 },
11503         };
11504
11505         const StringTemplate preMain
11506         (
11507                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11508                 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11509                 "        %f16 = OpTypeFloat 16\n"
11510
11511                 "${type_decl}"
11512
11513                 "     %up_u32 = OpTypePointer Uniform %u32\n"
11514                 "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11515                 "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
11516                 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
11517
11518                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11519                 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11520                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11521                 "   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
11522                 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
11523
11524                 " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11525                 "   %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
11526                 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
11527
11528                 "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
11529                 "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
11530                 "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
11531         );
11532
11533         const StringTemplate decoration
11534         (
11535                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11536                 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11537                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11538                 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
11539                 "OpDecorate %SSBO_SRC BufferBlock\n"
11540                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11541                 "OpDecorate %ssbo_src Binding 0\n"
11542
11543                 "OpDecorate %ra_u32 ArrayStride 4\n"
11544                 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
11545                 "OpDecorate %SSBO_IDX BufferBlock\n"
11546                 "OpDecorate %ssbo_idx DescriptorSet 0\n"
11547                 "OpDecorate %ssbo_idx Binding 1\n"
11548
11549                 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
11550                 "OpDecorate %SSBO_DST BufferBlock\n"
11551                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11552                 "OpDecorate %ssbo_dst Binding 2\n"
11553         );
11554
11555         const StringTemplate testFun
11556         (
11557                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11558                 "    %param = OpFunctionParameter %v4f32\n"
11559                 "    %entry = OpLabel\n"
11560
11561                 "        %i = OpVariable %fp_i32 Function\n"
11562                 "             OpStore %i %c_i32_0\n"
11563
11564                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
11565                 "             OpSelectionMerge %end_if None\n"
11566                 "             OpBranchConditional %will_run %run_test %end_if\n"
11567
11568                 " %run_test = OpLabel\n"
11569                 "             OpBranch %loop\n"
11570
11571                 "     %loop = OpLabel\n"
11572                 "    %i_cmp = OpLoad %i32 %i\n"
11573                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11574                 "             OpLoopMerge %merge %next None\n"
11575                 "             OpBranchConditional %lt %write %merge\n"
11576
11577                 "    %write = OpLabel\n"
11578                 "      %ndx = OpLoad %i32 %i\n"
11579
11580                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11581
11582                 "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
11583                 "  %val_idx = OpLoad %u32 %src_idx\n"
11584
11585                 "  %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
11586                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11587
11588                 "             OpBranch %next\n"
11589
11590                 "     %next = OpLabel\n"
11591                 "    %i_cur = OpLoad %i32 %i\n"
11592                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11593                 "             OpStore %i %i_new\n"
11594                 "             OpBranch %loop\n"
11595
11596                 "    %merge = OpLabel\n"
11597                 "             OpBranch %end_if\n"
11598                 "   %end_if = OpLabel\n"
11599                 "             OpReturnValue %param\n"
11600
11601                 "             OpFunctionEnd\n"
11602         );
11603
11604         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11605         {
11606                 const TestType&         testType                = testTypes[testTypeIdx];
11607                 const string            testName                = testType.typeName;
11608                 const size_t            itemsPerType    = testType.typeStride / sizeof(deFloat16);
11609                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11610                 SpecResource            specResource;
11611                 map<string, string>     specs;
11612                 VulkanFeatures          features;
11613                 vector<deUint32>        inputDataNdx;
11614                 map<string, string>     fragments;
11615                 vector<string>          extensions;
11616
11617                 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
11618                         inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
11619
11620                 specs["num_data_points"]        = de::toString(iterations);
11621                 specs["tt"]                                     = testType.typeName;
11622                 specs["ts"]                                     = testType.typeStorage;
11623                 specs["tt_stride"]                      = de::toString(testType.typeStride);
11624                 specs["type_decl"]                      = testType.typeDecls;
11625
11626                 fragments["capability"]         = "OpCapability Float16\n";
11627                 fragments["decoration"]         = decoration.specialize(specs);
11628                 fragments["pre_main"]           = preMain.specialize(specs);
11629                 fragments["testfun"]            = testFun.specialize(specs);
11630                 fragments["testfun"]            += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
11631                 fragments["testfun"]            += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
11632
11633                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11634                 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11635                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11636                 specResource.verifyIO = compareFP16VectorExtractFunc;
11637
11638                 extensions.push_back("VK_KHR_shader_float16_int8");
11639
11640                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
11641
11642                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11643         }
11644
11645         return testGroup.release();
11646 }
11647
11648 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
11649 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11650 {
11651         if (inputs.size() != 2 || outputAllocs.size() != 1)
11652                 return false;
11653
11654         vector<deUint8> input1Bytes;
11655         vector<deUint8> input2Bytes;
11656
11657         inputs[0].getBytes(input1Bytes);
11658         inputs[1].getBytes(input2Bytes);
11659
11660         DE_ASSERT(input1Bytes.size() > 0);
11661         DE_ASSERT(input2Bytes.size() > 0);
11662         DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11663
11664         const size_t                    iterations                      = input2Bytes.size() / sizeof(deUint32);
11665         const size_t                    componentsStride        = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11666         const deFloat16* const  input1AsFP16            = (const deFloat16*)&input1Bytes[0];
11667         const deUint32* const   inputIndices            = (const deUint32*)&input2Bytes[0];
11668         const deFloat16* const  outputAsFP16            = (const deFloat16*)outputAllocs[0]->getHostPtr();
11669         const deFloat16                 magic                           = tcu::Float16(float(REPLACEMENT)).bits();
11670         std::string                             error;
11671
11672         DE_ASSERT(componentsStride == 2 || componentsStride == 4);
11673         DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
11674
11675         for (size_t idx = 0; idx < iterations; ++idx)
11676         {
11677                 const deFloat16*        inputVec                = &input1AsFP16[componentsStride * idx];
11678                 const deFloat16*        outputVec               = &outputAsFP16[componentsStride * idx];
11679                 const deUint32          replacedCompNdx = inputIndices[idx];
11680
11681                 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
11682
11683                 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
11684                 {
11685                         const deFloat16 expected        = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
11686
11687                         if (!compare16BitFloat(expected, outputVec[compNdx], error))
11688                         {
11689                                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
11690
11691                                 return false;
11692                         }
11693                 }
11694         }
11695
11696         return true;
11697 }
11698
11699 template<class SpecResource>
11700 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
11701 {
11702         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
11703
11704         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11705         const deUint32                                          replacement                     = 42;
11706         const deUint32                                          numDataPoints           = 256;
11707         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11708         const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
11709
11710         struct TestType
11711         {
11712                 const deUint32  typeComponents;
11713                 const size_t    typeStride;
11714                 const char*             typeName;
11715                 const char*             typeDecls;
11716                 VerifyIOFunc    verifyIOFunc;
11717                 const char*             typeStorage;
11718                 const string            loadFunction;
11719                 const string            storeFunction;
11720         };
11721
11722         const TestType  testTypes[]     =
11723         {
11724                 {
11725                         2,
11726                         2 * sizeof(deFloat16),
11727                         "v2f16",
11728                         "      %v2f16 = OpTypeVector %f16 2\n"
11729                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11730                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11731                         compareFP16VectorInsertFunc<2, replacement>,
11732                         "u32",
11733                         loadV2F16FromUint,
11734                         storeV2F16AsUint
11735                 },
11736                 {
11737                         3,
11738                         4 * sizeof(deFloat16),
11739                         "v3f16",
11740                         "      %v2f16 = OpTypeVector %f16 2\n"
11741                         "      %v3f16 = OpTypeVector %f16 3\n"
11742                         "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
11743                         "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
11744                         compareFP16VectorInsertFunc<3, replacement>,
11745                         "ra_u32_2",
11746                         loadV3F16FromUints,
11747                         storeV3F16AsUints
11748                 },
11749                 {
11750                         4,
11751                         4 * sizeof(deFloat16),
11752                         "v4f16",
11753                         "      %v2f16 = OpTypeVector %f16 2\n"
11754                         "      %v4f16 = OpTypeVector %f16 4\n"
11755                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11756                         "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11757                         compareFP16VectorInsertFunc<4, replacement>,
11758                         "ra_u32_2",
11759                         loadV4F16FromUints,
11760                         storeV4F16AsUints
11761                 },
11762         };
11763
11764         const StringTemplate preMain
11765         (
11766                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11767                 "        %f16 = OpTypeFloat 16\n"
11768                 "  %c_f16_ins = OpConstant %f16 ${replacement}\n"
11769
11770                 "${type_decl}"
11771
11772                 "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11773                 "         %up_u32 = OpTypePointer Uniform %u32\n"
11774                 "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
11775                 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
11776
11777                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11778                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11779                 "   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
11780                 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
11781
11782                 "   %SSBO_DST = OpTypeStruct %ra_${ts}\n"
11783                 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
11784
11785                 "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
11786                 "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
11787                 "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
11788         );
11789
11790         const StringTemplate decoration
11791         (
11792                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11793                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11794                 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
11795                 "OpDecorate %SSBO_SRC BufferBlock\n"
11796                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11797                 "OpDecorate %ssbo_src Binding 0\n"
11798
11799                 "OpDecorate %ra_u32 ArrayStride 4\n"
11800                 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
11801                 "OpDecorate %SSBO_IDX BufferBlock\n"
11802                 "OpDecorate %ssbo_idx DescriptorSet 0\n"
11803                 "OpDecorate %ssbo_idx Binding 1\n"
11804
11805                 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
11806                 "OpDecorate %SSBO_DST BufferBlock\n"
11807                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11808                 "OpDecorate %ssbo_dst Binding 2\n"
11809         );
11810
11811         const StringTemplate testFun
11812         (
11813                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11814                 "    %param = OpFunctionParameter %v4f32\n"
11815                 "    %entry = OpLabel\n"
11816
11817                 "        %i = OpVariable %fp_i32 Function\n"
11818                 "             OpStore %i %c_i32_0\n"
11819
11820                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
11821                 "             OpSelectionMerge %end_if None\n"
11822                 "             OpBranchConditional %will_run %run_test %end_if\n"
11823
11824                 " %run_test = OpLabel\n"
11825                 "             OpBranch %loop\n"
11826
11827                 "     %loop = OpLabel\n"
11828                 "    %i_cmp = OpLoad %i32 %i\n"
11829                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11830                 "             OpLoopMerge %merge %next None\n"
11831                 "             OpBranchConditional %lt %write %merge\n"
11832
11833                 "    %write = OpLabel\n"
11834                 "      %ndx = OpLoad %i32 %i\n"
11835
11836                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11837
11838                 "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
11839                 "  %val_idx = OpLoad %u32 %src_idx\n"
11840
11841                 "  %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
11842                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11843
11844                 "             OpBranch %next\n"
11845
11846                 "     %next = OpLabel\n"
11847                 "    %i_cur = OpLoad %i32 %i\n"
11848                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11849                 "             OpStore %i %i_new\n"
11850                 "             OpBranch %loop\n"
11851
11852                 "    %merge = OpLabel\n"
11853                 "             OpBranch %end_if\n"
11854                 "   %end_if = OpLabel\n"
11855                 "             OpReturnValue %param\n"
11856
11857                 "             OpFunctionEnd\n"
11858         );
11859
11860         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11861         {
11862                 const TestType&         testType                = testTypes[testTypeIdx];
11863                 const string            testName                = testType.typeName;
11864                 const size_t            itemsPerType    = testType.typeStride / sizeof(deFloat16);
11865                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11866                 SpecResource            specResource;
11867                 map<string, string>     specs;
11868                 VulkanFeatures          features;
11869                 vector<deUint32>        inputDataNdx;
11870                 map<string, string>     fragments;
11871                 vector<string>          extensions;
11872
11873                 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
11874                         inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
11875
11876                 specs["num_data_points"]        = de::toString(iterations);
11877                 specs["tt"]                                     = testType.typeName;
11878                 specs["ts"]                                     = testType.typeStorage;
11879                 specs["tt_stride"]                      = de::toString(testType.typeStride);
11880                 specs["type_decl"]                      = testType.typeDecls;
11881                 specs["replacement"]            = de::toString(replacement);
11882
11883                 fragments["capability"]         = "OpCapability Float16\n";
11884                 fragments["decoration"]         = decoration.specialize(specs);
11885                 fragments["pre_main"]           = preMain.specialize(specs);
11886                 fragments["testfun"]            = testFun.specialize(specs);
11887                 fragments["testfun"]            += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
11888                 fragments["testfun"]            += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
11889
11890                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11891                 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11892                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11893                 specResource.verifyIO = testType.verifyIOFunc;
11894
11895                 extensions.push_back("VK_KHR_shader_float16_int8");
11896
11897                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
11898
11899                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11900         }
11901
11902         return testGroup.release();
11903 }
11904
11905 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
11906 {
11907         const size_t    compNdxCount    = (vec1Len + vec2Len + 1);
11908         const size_t    compNdxLimited  = iteration % (compNdxCount * compNdxCount);
11909         size_t                  comp;
11910
11911         switch (componentNdx)
11912         {
11913                 case 0: comp = compNdxLimited / compNdxCount; break;
11914                 case 1: comp = compNdxLimited % compNdxCount; break;
11915                 case 2: comp = 0; break;
11916                 case 3: comp = 1; break;
11917                 default: TCU_THROW(InternalError, "Impossible");
11918         }
11919
11920         if (comp >= vec1Len + vec2Len)
11921         {
11922                 validate = false;
11923                 return 0;
11924         }
11925         else
11926         {
11927                 validate = true;
11928                 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
11929         }
11930 }
11931
11932 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
11933 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11934 {
11935         DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
11936         DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
11937         DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
11938
11939         if (inputs.size() != 2 || outputAllocs.size() != 1)
11940                 return false;
11941
11942         vector<deUint8> input1Bytes;
11943         vector<deUint8> input2Bytes;
11944
11945         inputs[0].getBytes(input1Bytes);
11946         inputs[1].getBytes(input2Bytes);
11947
11948         DE_ASSERT(input1Bytes.size() > 0);
11949         DE_ASSERT(input2Bytes.size() > 0);
11950         DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
11951
11952         const size_t                    componentsStrideDst             = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
11953         const size_t                    componentsStrideSrc0    = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
11954         const size_t                    componentsStrideSrc1    = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
11955         const size_t                    iterations                              = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
11956         const deFloat16* const  input1AsFP16                    = (const deFloat16*)&input1Bytes[0];
11957         const deFloat16* const  input2AsFP16                    = (const deFloat16*)&input2Bytes[0];
11958         const deFloat16* const  outputAsFP16                    = (const deFloat16*)outputAllocs[0]->getHostPtr();
11959         std::string                             error;
11960
11961         DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
11962         DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
11963
11964         for (size_t idx = 0; idx < iterations; ++idx)
11965         {
11966                 const deFloat16*        input1Vec       = &input1AsFP16[componentsStrideSrc0 * idx];
11967                 const deFloat16*        input2Vec       = &input2AsFP16[componentsStrideSrc1 * idx];
11968                 const deFloat16*        outputVec       = &outputAsFP16[componentsStrideDst * idx];
11969
11970                 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
11971                 {
11972                         bool            validate        = true;
11973                         deFloat16       expected        = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
11974
11975                         if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
11976                         {
11977                                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
11978
11979                                 return false;
11980                         }
11981                 }
11982         }
11983
11984         return true;
11985 }
11986
11987 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
11988 {
11989         DE_ASSERT(dstComponentsCount <= 4);
11990         DE_ASSERT(src0ComponentsCount <= 4);
11991         DE_ASSERT(src1ComponentsCount <= 4);
11992         deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
11993
11994         switch (funcCode)
11995         {
11996                 case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
11997                 case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
11998                 case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
11999                 case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12000                 case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12001                 case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12002                 case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12003                 case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12004                 case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12005                 case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12006                 case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12007                 case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12008                 case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12009                 case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12010                 case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12011                 case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12012                 case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12013                 case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12014                 case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12015                 case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12016                 case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12017                 case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12018                 case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12019                 case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12020                 case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12021                 case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12022                 case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12023                 default: TCU_THROW(InternalError, "Invalid number of components specified.");
12024         }
12025 }
12026
12027 template<class SpecResource>
12028 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12029 {
12030         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12031         const int                                                       testSpecificSeed        = deStringHash(testGroup->getName());
12032         const int                                                       seed                            = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12033         de::Random                                                      rnd                                     (seed);
12034         const deUint32                                          numDataPoints           = 128;
12035         map<string, string>                                     fragments;
12036
12037         struct TestType
12038         {
12039                 const deUint32  typeComponents;
12040                 const char*             typeName;
12041                 const string    loadFunction;
12042                 const string    storeFunction;
12043         };
12044
12045         const TestType  testTypes[]     =
12046         {
12047                 {
12048                         2,
12049                         "v2f16",
12050                         loadV2F16FromUint,
12051                         storeV2F16AsUint
12052                 },
12053                 {
12054                         3,
12055                         "v3f16",
12056                         loadV3F16FromUints,
12057                         storeV3F16AsUints
12058                 },
12059                 {
12060                         4,
12061                         "v4f16",
12062                         loadV4F16FromUints,
12063                         storeV4F16AsUints
12064                 },
12065         };
12066
12067         const StringTemplate preMain
12068         (
12069                 "    %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12070                 "     %c_i32_cc = OpConstant %i32 ${case_count}\n"
12071                 "          %f16 = OpTypeFloat 16\n"
12072                 "        %v2f16 = OpTypeVector %f16 2\n"
12073                 "        %v3f16 = OpTypeVector %f16 3\n"
12074                 "        %v4f16 = OpTypeVector %f16 4\n"
12075
12076                 "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12077                 "     %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12078                 "     %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12079                 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12080                 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12081                 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12082
12083                 "     %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12084                 "   %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12085                 "  %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12086                 "       %up_u32 = OpTypePointer Uniform %u32\n"
12087                 "   %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12088                 "   %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12089                 "   %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12090
12091                 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12092                 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12093                 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12094
12095                 "        %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12096
12097                 "    %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12098                 "    %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12099                 "     %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12100         );
12101
12102         const StringTemplate decoration
12103         (
12104                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12105                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12106                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12107
12108                 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12109                 "OpDecorate %SSBO_v2f16 BufferBlock\n"
12110
12111                 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12112                 "OpDecorate %SSBO_v3f16 BufferBlock\n"
12113
12114                 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12115                 "OpDecorate %SSBO_v4f16 BufferBlock\n"
12116
12117                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12118                 "OpDecorate %ssbo_src0 Binding 0\n"
12119                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12120                 "OpDecorate %ssbo_src1 Binding 1\n"
12121                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12122                 "OpDecorate %ssbo_dst Binding 2\n"
12123         );
12124
12125         const StringTemplate testFun
12126         (
12127                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12128                 "    %param = OpFunctionParameter %v4f32\n"
12129                 "    %entry = OpLabel\n"
12130
12131                 "        %i = OpVariable %fp_i32 Function\n"
12132                 "             OpStore %i %c_i32_0\n"
12133
12134                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12135                 "             OpSelectionMerge %end_if None\n"
12136                 "             OpBranchConditional %will_run %run_test %end_if\n"
12137
12138                 " %run_test = OpLabel\n"
12139                 "             OpBranch %loop\n"
12140
12141                 "     %loop = OpLabel\n"
12142                 "    %i_cmp = OpLoad %i32 %i\n"
12143                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12144                 "             OpLoopMerge %merge %next None\n"
12145                 "             OpBranchConditional %lt %write %merge\n"
12146
12147                 "    %write = OpLabel\n"
12148                 "      %ndx = OpLoad %i32 %i\n"
12149                 " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12150                 " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12151                 "  %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12152                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12153                 "             OpBranch %next\n"
12154
12155                 "     %next = OpLabel\n"
12156                 "    %i_cur = OpLoad %i32 %i\n"
12157                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12158                 "             OpStore %i %i_new\n"
12159                 "             OpBranch %loop\n"
12160
12161                 "    %merge = OpLabel\n"
12162                 "             OpBranch %end_if\n"
12163                 "   %end_if = OpLabel\n"
12164                 "             OpReturnValue %param\n"
12165                 "             OpFunctionEnd\n"
12166                 "\n"
12167
12168                 "   %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12169                 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12170                 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12171                 "%sw_paramn = OpFunctionParameter %i32\n"
12172                 " %sw_entry = OpLabel\n"
12173                 "   %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12174                 "             OpSelectionMerge %switch_e None\n"
12175                 "             OpSwitch %modulo %default ${case_list}\n"
12176                 "${case_bodies}"
12177                 "%default   = OpLabel\n"
12178                 "             OpUnreachable\n" // Unreachable default case for switch statement
12179                 "%switch_e  = OpLabel\n"
12180                 "             OpUnreachable\n" // Unreachable merge block for switch statement
12181                 "             OpFunctionEnd\n"
12182         );
12183
12184         const StringTemplate testCaseBody
12185         (
12186                 "%case_${case_ndx}    = OpLabel\n"
12187                 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12188                 "             OpReturnValue %val_dst_${case_ndx}\n"
12189         );
12190
12191         for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12192         {
12193                 const TestType& dstType                 = testTypes[dstTypeIdx];
12194
12195                 for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12196                 {
12197                         const TestType& src0Type        = testTypes[comp0Idx];
12198
12199                         for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12200                         {
12201                                 const TestType&                 src1Type                        = testTypes[comp1Idx];
12202                                 const deUint32                  input0Stride            = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12203                                 const deUint32                  input1Stride            = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12204                                 const deUint32                  outputStride            = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12205                                 const vector<deFloat16> float16Input0Data       = getFloat16s(rnd, input0Stride * numDataPoints);
12206                                 const vector<deFloat16> float16Input1Data       = getFloat16s(rnd, input1Stride * numDataPoints);
12207                                 const vector<deFloat16> float16OutputDummy      (outputStride * numDataPoints, 0);
12208                                 const string                    testName                        = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12209                                 deUint32                                caseCount                       = 0;
12210                                 SpecResource                    specResource;
12211                                 map<string, string>             specs;
12212                                 vector<string>                  extensions;
12213                                 VulkanFeatures                  features;
12214                                 string                                  caseBodies;
12215                                 string                                  caseList;
12216
12217                                 // Generate case
12218                                 {
12219                                         vector<string>  componentList;
12220
12221                                         // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12222                                         {
12223                                                 deUint32                caseNo          = 0;
12224
12225                                                 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12226                                                         componentList.push_back(de::toString(caseNo++));
12227                                                 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12228                                                         componentList.push_back(de::toString(caseNo++));
12229                                                 componentList.push_back("0xFFFFFFFF");
12230                                         }
12231
12232                                         for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12233                                         {
12234                                                 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12235                                                 {
12236                                                         map<string, string>     specCase;
12237                                                         string                          shuffle         = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12238
12239                                                         for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12240                                                                 shuffle += " " + de::toString(compIdx - 2);
12241
12242                                                         specCase["case_ndx"]    = de::toString(caseCount);
12243                                                         specCase["shuffle"]             = shuffle;
12244                                                         specCase["tt_dst"]              = dstType.typeName;
12245
12246                                                         caseBodies      += testCaseBody.specialize(specCase);
12247                                                         caseList        += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12248
12249                                                         caseCount++;
12250                                                 }
12251                                         }
12252                                 }
12253
12254                                 specs["num_data_points"]        = de::toString(numDataPoints);
12255                                 specs["tt_dst"]                         = dstType.typeName;
12256                                 specs["tt_src0"]                        = src0Type.typeName;
12257                                 specs["tt_src1"]                        = src1Type.typeName;
12258                                 specs["case_bodies"]            = caseBodies;
12259                                 specs["case_list"]                      = caseList;
12260                                 specs["case_count"]                     = de::toString(caseCount);
12261
12262                                 fragments["capability"]         = "OpCapability Float16\n";
12263                                 fragments["decoration"]         = decoration.specialize(specs);
12264                                 fragments["pre_main"]           = preMain.specialize(specs);
12265                                 fragments["testfun"]            = testFun.specialize(specs);
12266                                 fragments["testfun"]            += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12267                                 fragments["testfun"]            += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12268                                 fragments["testfun"]            += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12269
12270                                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12271                                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12272                                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12273                                 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12274
12275                                 extensions.push_back("VK_KHR_shader_float16_int8");
12276
12277                                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
12278
12279                                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12280                         }
12281                 }
12282         }
12283
12284         return testGroup.release();
12285 }
12286
12287 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12288 {
12289         if (inputs.size() != 1 || outputAllocs.size() != 1)
12290                 return false;
12291
12292         vector<deUint8> input1Bytes;
12293
12294         inputs[0].getBytes(input1Bytes);
12295
12296         DE_ASSERT(input1Bytes.size() > 0);
12297         DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12298
12299         const size_t                    iterations              = input1Bytes.size() / sizeof(deFloat16);
12300         const deFloat16* const  input1AsFP16    = (const deFloat16*)&input1Bytes[0];
12301         const deFloat16* const  outputAsFP16    = (const deFloat16*)outputAllocs[0]->getHostPtr();
12302         const deFloat16                 exceptionValue  = tcu::Float16(-1.0).bits();
12303         std::string                             error;
12304
12305         for (size_t idx = 0; idx < iterations; ++idx)
12306         {
12307                 if (input1AsFP16[idx] == exceptionValue)
12308                         continue;
12309
12310                 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12311                 {
12312                         log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12313
12314                         return false;
12315                 }
12316         }
12317
12318         return true;
12319 }
12320
12321 template<class SpecResource>
12322 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12323 {
12324         de::MovePtr<tcu::TestCaseGroup>         testGroup                               (new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12325         const deUint32                                          numElements                             = 8;
12326         const string                                            testName                                = "struct";
12327         const deUint32                                          structItemsCount                = 88;
12328         const deUint32                                          exceptionIndices[]              = { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12329         const deFloat16                                         exceptionValue                  = tcu::Float16(-1.0).bits();
12330         const deUint32                                          fieldModifier                   = 2;
12331         const deUint32                                          fieldModifiedMulIndex   = 60;
12332         const deUint32                                          fieldModifiedAddIndex   = 66;
12333
12334         const StringTemplate preMain
12335         (
12336                 "    %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12337                 "          %f16 = OpTypeFloat 16\n"
12338                 "        %v2f16 = OpTypeVector %f16 2\n"
12339                 "        %v3f16 = OpTypeVector %f16 3\n"
12340                 "        %v4f16 = OpTypeVector %f16 4\n"
12341                 "    %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12342
12343                 "${consts}"
12344
12345                 "     %c_f16_n1 = OpConstant %f16 -1.0\n"
12346                 "   %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12347                 "      %c_u32_5 = OpConstant %u32 5\n"
12348                 "      %c_u32_6 = OpConstant %u32 6\n"
12349                 "      %c_u32_7 = OpConstant %u32 7\n"
12350                 "      %c_u32_8 = OpConstant %u32 8\n"
12351                 "      %c_u32_9 = OpConstant %u32 9\n"
12352                 "     %c_u32_10 = OpConstant %u32 10\n"
12353                 "     %c_u32_11 = OpConstant %u32 11\n"
12354                 "     %c_u32_12 = OpConstant %u32 12\n"
12355                 "     %c_u32_13 = OpConstant %u32 13\n"
12356                 "     %c_u32_14 = OpConstant %u32 14\n"
12357                 "     %c_u32_15 = OpConstant %u32 15\n"
12358                 "     %c_u32_16 = OpConstant %u32 16\n"
12359                 "     %c_u32_17 = OpConstant %u32 17\n"
12360                 "     %c_u32_18 = OpConstant %u32 18\n"
12361                 "     %c_u32_19 = OpConstant %u32 19\n"
12362                 "     %c_u32_20 = OpConstant %u32 20\n"
12363                 "     %c_u32_21 = OpConstant %u32 21\n"
12364                 "     %c_u32_22 = OpConstant %u32 22\n"
12365                 "     %c_u32_23 = OpConstant %u32 23\n"
12366                 "     %c_u32_24 = OpConstant %u32 24\n"
12367                 "     %c_u32_25 = OpConstant %u32 25\n"
12368                 "     %c_u32_26 = OpConstant %u32 26\n"
12369                 "     %c_u32_27 = OpConstant %u32 27\n"
12370                 "     %c_u32_28 = OpConstant %u32 28\n"
12371                 "     %c_u32_29 = OpConstant %u32 29\n"
12372                 "     %c_u32_30 = OpConstant %u32 30\n"
12373                 "     %c_u32_31 = OpConstant %u32 31\n"
12374                 "     %c_u32_33 = OpConstant %u32 33\n"
12375                 "     %c_u32_34 = OpConstant %u32 34\n"
12376                 "     %c_u32_35 = OpConstant %u32 35\n"
12377                 "     %c_u32_36 = OpConstant %u32 36\n"
12378                 "     %c_u32_37 = OpConstant %u32 37\n"
12379                 "     %c_u32_38 = OpConstant %u32 38\n"
12380                 "     %c_u32_39 = OpConstant %u32 39\n"
12381                 "     %c_u32_40 = OpConstant %u32 40\n"
12382                 "     %c_u32_41 = OpConstant %u32 41\n"
12383                 "     %c_u32_44 = OpConstant %u32 44\n"
12384
12385                 " %f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12386                 " %v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12387                 " %v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12388                 " %v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12389                 " %v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12390                 " %struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12391                 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12392                 " %st_test      = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12393
12394                 "       %up_u32 = OpTypePointer Uniform %u32\n"
12395                 "    %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
12396                 "    %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
12397                 "      %SSBO_st = OpTypeStruct %ra_ra_u32\n"
12398                 "   %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12399
12400                 "     %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12401         );
12402
12403         const StringTemplate decoration
12404         (
12405                 "OpDecorate %SSBO_st BufferBlock\n"
12406                 "OpDecorate %ra_u32_44 ArrayStride 4\n"
12407                 "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
12408                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12409                 "OpDecorate %ssbo_dst Binding 1\n"
12410
12411                 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
12412
12413                 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12414                 "OpMemberDecorate %struct16 0 Offset 0\n"
12415                 "OpMemberDecorate %struct16 1 Offset 4\n"
12416                 "OpDecorate %struct16arr3 ArrayStride 16\n"
12417                 "OpDecorate %f16arr3 ArrayStride 2\n"
12418                 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12419                 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12420                 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12421
12422                 "OpMemberDecorate %st_test 0 Offset 0\n"
12423                 "OpMemberDecorate %st_test 1 Offset 4\n"
12424                 "OpMemberDecorate %st_test 2 Offset 8\n"
12425                 "OpMemberDecorate %st_test 3 Offset 16\n"
12426                 "OpMemberDecorate %st_test 4 Offset 24\n"
12427                 "OpMemberDecorate %st_test 5 Offset 32\n"
12428                 "OpMemberDecorate %st_test 6 Offset 80\n"
12429                 "OpMemberDecorate %st_test 7 Offset 100\n"
12430                 "OpMemberDecorate %st_test 8 Offset 104\n"
12431                 "OpMemberDecorate %st_test 9 Offset 144\n"
12432         );
12433
12434         const StringTemplate testFun
12435         (
12436                 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12437                 "     %param = OpFunctionParameter %v4f32\n"
12438                 "     %entry = OpLabel\n"
12439
12440                 "         %i = OpVariable %fp_i32 Function\n"
12441                 "              OpStore %i %c_i32_0\n"
12442
12443                 "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12444                 "              OpSelectionMerge %end_if None\n"
12445                 "              OpBranchConditional %will_run %run_test %end_if\n"
12446
12447                 "  %run_test = OpLabel\n"
12448                 "              OpBranch %loop\n"
12449
12450                 "      %loop = OpLabel\n"
12451                 "     %i_cmp = OpLoad %i32 %i\n"
12452                 "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12453                 "              OpLoopMerge %merge %next None\n"
12454                 "              OpBranchConditional %lt %write %merge\n"
12455
12456                 "     %write = OpLabel\n"
12457                 "       %ndx = OpLoad %i32 %i\n"
12458
12459                 "      %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
12460                 "      %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
12461                 "      %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
12462
12463                 "      %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
12464
12465                 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
12466                 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
12467                 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
12468                 "  %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
12469                 "    %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
12470
12471                 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
12472                 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
12473                 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
12474                 "  %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
12475                 "    %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
12476
12477                 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
12478                 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
12479                 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
12480                 "  %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
12481                 "    %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
12482
12483                 "      %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
12484
12485                 "    %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
12486                 "    %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
12487                 "    %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
12488                 "    %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
12489                 "    %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
12490                 "      %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
12491
12492                 "      %fndx = OpConvertSToF %f16 %ndx\n"
12493                 "  %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
12494                 "  %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
12495
12496                 "   %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
12497                 "   %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
12498                 "    %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
12499                 "    %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
12500                 "    %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
12501                 "    %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
12502                 "    %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
12503                 "      %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
12504
12505                 "    %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
12506                 "    %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
12507                 "    %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
12508                 "      %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
12509
12510                 "    %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
12511
12512                 // Storage section: all elements that are not directly accessed should
12513                 // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
12514                 // is constructed with one element from a constant -1.0.
12515                 // half offset 0
12516                 "      %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
12517                 "     %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
12518                 "      %bc_0 = OpBitcast %u32 %vec_0\n"
12519                 "     %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
12520                 "              OpStore %gep_0 %bc_0\n"
12521
12522                 // <2 x half> offset 4
12523                 "      %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
12524                 "      %bc_1 = OpBitcast %u32 %ex_1\n"
12525                 "     %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
12526                 "              OpStore %gep_1 %bc_1\n"
12527
12528                 // <3 x half> offset 8
12529                 "      %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
12530                 "    %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
12531                 "    %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
12532                 "    %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
12533                 "    %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
12534                 "   %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
12535                 "   %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
12536                 "              OpStore %gep_2_0 %bc_2_0\n"
12537                 "              OpStore %gep_2_1 %bc_2_1\n"
12538
12539                 // <4 x half> offset 16
12540                 "      %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
12541                 "    %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
12542                 "    %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
12543                 "    %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
12544                 "    %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
12545                 "   %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
12546                 "   %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
12547                 "              OpStore %gep_3_0 %bc_3_0\n"
12548                 "              OpStore %gep_3_1 %bc_3_1\n"
12549
12550                 // [3 x half] offset 24
12551                 "    %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
12552                 "    %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
12553                 "    %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
12554                 "   %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
12555                 "   %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
12556                 "    %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
12557                 "    %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
12558                 "   %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
12559                 "   %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
12560                 "              OpStore %gep_4_0 %bc_4_0\n"
12561                 "              OpStore %gep_4_1 %bc_4_1\n"
12562
12563                 // [3 x {half, [3 x <2 x half>]}] offset 32
12564                 "    %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
12565                 "    %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
12566                 "    %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
12567                 "  %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
12568                 "  %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
12569                 "  %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
12570                 "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
12571                 "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
12572                 "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
12573                 "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
12574                 "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
12575                 "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
12576                 "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
12577                 "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
12578                 "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
12579                 " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
12580                 " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
12581                 " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
12582                 "  %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
12583                 "  %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
12584                 "  %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
12585                 "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
12586                 "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
12587                 "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
12588                 "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
12589                 "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
12590                 "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
12591                 "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
12592                 "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
12593                 "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
12594                 "  %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
12595                 "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
12596                 "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
12597                 "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
12598                 "  %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
12599                 "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
12600                 "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
12601                 "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
12602                 "  %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
12603                 "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
12604                 "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
12605                 "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
12606                 "              OpStore %gep_5_0_0 %bc_5_0_0\n"
12607                 "              OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
12608                 "              OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
12609                 "              OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
12610                 "              OpStore %gep_5_1_0 %bc_5_1_0\n"
12611                 "              OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
12612                 "              OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
12613                 "              OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
12614                 "              OpStore %gep_5_2_0 %bc_5_2_0\n"
12615                 "              OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
12616                 "              OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
12617                 "              OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
12618
12619                 // [5 x <2 x half>] offset 80
12620                 "    %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
12621                 "    %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
12622                 "    %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
12623                 "    %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
12624                 "    %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
12625                 "    %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
12626                 "    %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
12627                 "    %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
12628                 "    %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
12629                 "    %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
12630                 "   %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
12631                 "   %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
12632                 "   %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
12633                 "   %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
12634                 "   %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
12635                 "              OpStore %gep_6_0 %bc_6_0\n"
12636                 "              OpStore %gep_6_1 %bc_6_1\n"
12637                 "              OpStore %gep_6_2 %bc_6_2\n"
12638                 "              OpStore %gep_6_3 %bc_6_3\n"
12639                 "              OpStore %gep_6_4 %bc_6_4\n"
12640
12641                 // half offset 100
12642                 "      %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
12643                 "     %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
12644                 "      %bc_7 = OpBitcast %u32 %vec_7\n"
12645                 "     %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
12646                 "              OpStore %gep_7 %bc_7\n"
12647
12648                 // [5 x <3 x half>] offset 104
12649                 "    %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
12650                 "    %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
12651                 "    %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
12652                 "    %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
12653                 "    %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
12654                 " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
12655                 " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
12656                 " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
12657                 " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
12658                 " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
12659                 " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
12660                 " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
12661                 " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
12662                 " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
12663                 " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
12664                 "  %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
12665                 "  %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
12666                 "  %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
12667                 "  %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
12668                 "  %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
12669                 "  %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
12670                 "  %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
12671                 "  %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
12672                 "  %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
12673                 "  %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
12674                 " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
12675                 " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
12676                 " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
12677                 " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
12678                 " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
12679                 " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
12680                 " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
12681                 " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
12682                 " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
12683                 " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
12684                 "              OpStore %gep_8_0_0 %bc_8_0_0\n"
12685                 "              OpStore %gep_8_0_1 %bc_8_0_1\n"
12686                 "              OpStore %gep_8_1_0 %bc_8_1_0\n"
12687                 "              OpStore %gep_8_1_1 %bc_8_1_1\n"
12688                 "              OpStore %gep_8_2_0 %bc_8_2_0\n"
12689                 "              OpStore %gep_8_2_1 %bc_8_2_1\n"
12690                 "              OpStore %gep_8_3_0 %bc_8_3_0\n"
12691                 "              OpStore %gep_8_3_1 %bc_8_3_1\n"
12692                 "              OpStore %gep_8_4_0 %bc_8_4_0\n"
12693                 "              OpStore %gep_8_4_1 %bc_8_4_1\n"
12694
12695                 // [3 x <4 x half>] offset 144
12696                 "    %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
12697                 "    %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
12698                 "    %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
12699                 " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
12700                 " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
12701                 " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
12702                 " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
12703                 " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
12704                 " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
12705                 "  %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
12706                 "  %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
12707                 "  %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
12708                 "  %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
12709                 "  %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
12710                 "  %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
12711                 " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
12712                 " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
12713                 " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
12714                 " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
12715                 " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
12716                 " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
12717                 "              OpStore %gep_9_0_0 %bc_9_0_0\n"
12718                 "              OpStore %gep_9_0_1 %bc_9_0_1\n"
12719                 "              OpStore %gep_9_1_0 %bc_9_1_0\n"
12720                 "              OpStore %gep_9_1_1 %bc_9_1_1\n"
12721                 "              OpStore %gep_9_2_0 %bc_9_2_0\n"
12722                 "              OpStore %gep_9_2_1 %bc_9_2_1\n"
12723
12724                 "              OpBranch %next\n"
12725
12726                 "      %next = OpLabel\n"
12727                 "     %i_cur = OpLoad %i32 %i\n"
12728                 "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12729                 "              OpStore %i %i_new\n"
12730                 "              OpBranch %loop\n"
12731
12732                 "     %merge = OpLabel\n"
12733                 "              OpBranch %end_if\n"
12734                 "    %end_if = OpLabel\n"
12735                 "              OpReturnValue %param\n"
12736                 "              OpFunctionEnd\n"
12737         );
12738
12739         {
12740                 SpecResource            specResource;
12741                 map<string, string>     specs;
12742                 VulkanFeatures          features;
12743                 map<string, string>     fragments;
12744                 vector<string>          extensions;
12745                 vector<deFloat16>       expectedOutput;
12746                 string                          consts;
12747
12748                 for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
12749                 {
12750                         vector<deFloat16>       expectedIterationOutput;
12751
12752                         for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
12753                                 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
12754
12755                         for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
12756                                 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
12757
12758                         expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
12759                         expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
12760
12761                         expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
12762                 }
12763
12764                 for (deUint32 i = 0; i < structItemsCount; ++i)
12765                         consts += "     %c_f16_" + de::toString(i) + " = OpConstant %f16 "  + de::toString(i) + "\n";
12766
12767                 specs["num_elements"]           = de::toString(numElements);
12768                 specs["struct_item_size"]       = de::toString(structItemsCount * sizeof(deFloat16));
12769                 specs["field_modifier"]         = de::toString(fieldModifier);
12770                 specs["consts"]                         = consts;
12771
12772                 fragments["capability"]         = "OpCapability Float16\n";
12773                 fragments["decoration"]         = decoration.specialize(specs);
12774                 fragments["pre_main"]           = preMain.specialize(specs);
12775                 fragments["testfun"]            = testFun.specialize(specs);
12776
12777                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12778                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12779                 specResource.verifyIO = compareFP16CompositeFunc;
12780
12781                 extensions.push_back("VK_KHR_shader_float16_int8");
12782
12783                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
12784
12785                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12786         }
12787
12788         return testGroup.release();
12789 }
12790
12791 template<class SpecResource>
12792 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
12793 {
12794         de::MovePtr<tcu::TestCaseGroup>         testGroup               (new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
12795         const deFloat16                                         exceptionValue  = tcu::Float16(-1.0).bits();
12796         const string                                            opName                  (op);
12797         const deUint32                                          opIndex                 = (opName == "OpCompositeInsert") ? 0
12798                                                                                                                 : (opName == "OpCompositeExtract") ? 1
12799                                                                                                                 : -1;
12800
12801         const StringTemplate preMain
12802         (
12803                 "   %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12804                 "  %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12805                 "  %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
12806                 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12807                 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
12808                 "         %f16 = OpTypeFloat 16\n"
12809                 "       %v2f16 = OpTypeVector %f16 2\n"
12810                 "       %v3f16 = OpTypeVector %f16 3\n"
12811                 "       %v4f16 = OpTypeVector %f16 4\n"
12812                 "    %c_f16_na = OpConstant %f16 -1.0\n"
12813                 "  %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
12814                 "     %c_u32_5 = OpConstant %u32 5\n"
12815                 "     %c_i32_5 = OpConstant %i32 5\n"
12816                 "     %c_i32_6 = OpConstant %i32 6\n"
12817                 "     %c_i32_7 = OpConstant %i32 7\n"
12818                 "     %c_i32_8 = OpConstant %i32 8\n"
12819                 "     %c_i32_9 = OpConstant %i32 9\n"
12820                 "    %c_i32_10 = OpConstant %i32 10\n"
12821                 "    %c_i32_11 = OpConstant %i32 11\n"
12822
12823                 "%f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12824                 "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12825                 "%v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12826                 "%v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12827                 "%v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12828                 "%struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12829                 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12830                 "%st_test      = OpTypeStruct %${field_type}\n"
12831
12832                 "      %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
12833                 "       %ra_st = OpTypeArray %u32 %c_i32_size\n"
12834                 "      %up_u32 = OpTypePointer Uniform %u32\n"
12835                 "     %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
12836                 "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
12837                 "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
12838                 "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12839                 "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12840                 "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12841
12842                 "${op_premain_decls}"
12843
12844                 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
12845                 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
12846
12847                 "    %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
12848                 "    %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
12849         );
12850
12851         const StringTemplate decoration
12852         (
12853                 "OpDecorate %SSBO_src BufferBlock\n"
12854                 "OpDecorate %SSBO_dst BufferBlock\n"
12855                 "OpDecorate %ra_f16 ArrayStride 4\n"
12856                 "OpDecorate %ra_st ArrayStride 4\n"
12857                 "OpDecorate %ssbo_src DescriptorSet 0\n"
12858                 "OpDecorate %ssbo_src Binding 0\n"
12859                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12860                 "OpDecorate %ssbo_dst Binding 1\n"
12861
12862                 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
12863                 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
12864
12865                 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12866                 "OpMemberDecorate %struct16 0 Offset 0\n"
12867                 "OpMemberDecorate %struct16 1 Offset 4\n"
12868                 "OpDecorate %struct16arr3 ArrayStride 16\n"
12869                 "OpDecorate %f16arr3 ArrayStride 2\n"
12870                 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12871                 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12872                 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12873
12874                 "OpMemberDecorate %st_test 0 Offset 0\n"
12875         );
12876
12877         const StringTemplate testFun
12878         (
12879                 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12880                 "     %param = OpFunctionParameter %v4f32\n"
12881                 "     %entry = OpLabel\n"
12882
12883                 "         %i = OpVariable %fp_i32 Function\n"
12884                 "              OpStore %i %c_i32_0\n"
12885
12886                 "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12887                 "              OpSelectionMerge %end_if None\n"
12888                 "              OpBranchConditional %will_run %run_test %end_if\n"
12889
12890                 "  %run_test = OpLabel\n"
12891                 "              OpBranch %loop\n"
12892
12893                 "      %loop = OpLabel\n"
12894                 "     %i_cmp = OpLoad %i32 %i\n"
12895                 "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12896                 "              OpLoopMerge %merge %next None\n"
12897                 "              OpBranchConditional %lt %write %merge\n"
12898
12899                 "     %write = OpLabel\n"
12900                 "       %ndx = OpLoad %i32 %i\n"
12901
12902                 "${op_sw_fun_call}"
12903
12904                 "    %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
12905                 "              OpBranch %next\n"
12906
12907                 "      %next = OpLabel\n"
12908                 "     %i_cur = OpLoad %i32 %i\n"
12909                 "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12910                 "              OpStore %i %i_new\n"
12911                 "              OpBranch %loop\n"
12912
12913                 "     %merge = OpLabel\n"
12914                 "              OpBranch %end_if\n"
12915                 "    %end_if = OpLabel\n"
12916                 "              OpReturnValue %param\n"
12917                 "              OpFunctionEnd\n"
12918
12919                 "${op_sw_fun_header}"
12920                 " %sw_param = OpFunctionParameter %st_test\n"
12921                 "%sw_paramn = OpFunctionParameter %i32\n"
12922                 " %sw_entry = OpLabel\n"
12923                 "             OpSelectionMerge %switch_e None\n"
12924                 "             OpSwitch %sw_paramn %default ${case_list}\n"
12925
12926                 "${case_bodies}"
12927
12928                 "%default   = OpLabel\n"
12929                 "             OpReturnValue ${op_case_default_value}\n"
12930                 "%switch_e  = OpLabel\n"
12931                 "             OpUnreachable\n" // Unreachable merge block for switch statement
12932                 "             OpFunctionEnd\n"
12933         );
12934
12935         const StringTemplate testCaseBody
12936         (
12937                 "%case_${case_ndx}    = OpLabel\n"
12938                 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
12939                 "             OpReturnValue %val_ret_${case_ndx}\n"
12940         );
12941
12942         const string loadF16
12943         (
12944                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
12945                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
12946                 "  %ld_${var}_entry = OpLabel\n"
12947                 "   %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
12948                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
12949                 "                     OpReturnValue %ld_${var}_st_test\n"
12950                 "                     OpFunctionEnd\n" +
12951                 loadScalarF16FromUint
12952         );
12953
12954         const string loadV2F16
12955         (
12956                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
12957                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
12958                 "  %ld_${var}_entry = OpLabel\n"
12959                 "   %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
12960                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
12961                 "                     OpReturnValue %ld_${var}_st_test\n"
12962                 "                     OpFunctionEnd\n" +
12963                 loadV2F16FromUint
12964         );
12965
12966         const string loadV3F16
12967         (
12968                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
12969                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
12970                 "  %ld_${var}_entry = OpLabel\n"
12971                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
12972                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
12973                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
12974                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
12975                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
12976                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
12977                 "    %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
12978                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
12979                 "                     OpReturnValue %ld_${var}_st_test\n"
12980                 "                     OpFunctionEnd\n"
12981         );
12982
12983         const string loadV4F16
12984         (
12985                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
12986                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
12987                 "  %ld_${var}_entry = OpLabel\n"
12988                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
12989                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
12990                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
12991                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
12992                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
12993                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
12994                 "    %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
12995                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
12996                 "                     OpReturnValue %ld_${var}_st_test\n"
12997                 "                     OpFunctionEnd\n"
12998         );
12999
13000         const string loadF16Arr3
13001         (
13002                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13003                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13004                 "  %ld_${var}_entry = OpLabel\n"
13005                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13006                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13007                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13008                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13009                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13010                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13011                 "   %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13012                 "   %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13013                 "   %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13014                 "   %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13015                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13016                 "                     OpReturnValue %ld_${var}_st_test\n"
13017                 "                     OpFunctionEnd\n"
13018         );
13019
13020         const string loadV2F16Arr5
13021         (
13022                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13023                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13024                 "  %ld_${var}_label = OpLabel\n"
13025                 "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13026                 "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13027                 "  %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13028                 "  %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13029                 "  %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13030                 "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13031                 "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13032                 "   %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13033                 "   %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13034                 "   %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13035                 "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13036                 "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13037                 "   %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13038                 "   %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13039                 "   %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13040                 "   %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13041                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13042                 "                     OpReturnValue %ld_${var}_st_test\n"
13043                 "                     OpFunctionEnd\n"
13044         );
13045
13046         const string loadV3F16Arr5
13047         (
13048                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13049                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13050                 "  %ld_${var}_entry = OpLabel\n"
13051                 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13052                 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13053                 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13054                 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13055                 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13056                 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13057                 "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13058                 "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13059                 "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13060                 "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13061                 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13062                 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13063                 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13064                 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13065                 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13066                 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13067                 " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13068                 " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13069                 " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13070                 " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13071                 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13072                 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13073                 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13074                 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13075                 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13076                 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13077                 " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13078                 " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13079                 " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13080                 " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13081                 "  %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13082                 "  %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13083                 "  %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13084                 "  %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13085                 "  %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13086                 "   %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13087                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13088                 "                     OpReturnValue %ld_${var}_st_test\n"
13089                 "                     OpFunctionEnd\n"
13090         );
13091
13092         const string loadV4F16Arr3
13093         (
13094                 "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13095                 "  %ld_${var}_param = OpFunctionParameter %i32\n"
13096                 "  %ld_${var}_entry = OpLabel\n"
13097                 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13098                 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13099                 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13100                 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13101                 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13102                 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13103                 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13104                 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13105                 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13106                 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13107                 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13108                 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13109                 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13110                 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13111                 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13112                 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13113                 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13114                 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13115                 "  %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13116                 "  %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13117                 "  %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13118                 "   %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13119                 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13120                 "                     OpReturnValue %ld_${var}_st_test\n"
13121                 "                     OpFunctionEnd\n"
13122         );
13123
13124         const string loadStruct16Arr3
13125         (
13126                 "          %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13127                 "    %ld_${var}_param = OpFunctionParameter %i32\n"
13128                 "    %ld_${var}_entry = OpLabel\n"
13129                 "%ld_${var}_gep_0_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13130                 "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13131                 "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13132                 "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13133                 "%ld_${var}_gep_1_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13134                 "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13135                 "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13136                 "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13137                 "%ld_${var}_gep_2_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13138                 "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13139                 "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13140                 "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13141                 " %ld_${var}_ld_0_0   = OpLoad %u32 %ld_${var}_gep_0_0\n"
13142                 " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13143                 " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13144                 " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13145                 " %ld_${var}_ld_1_0   = OpLoad %u32 %ld_${var}_gep_1_0\n"
13146                 " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13147                 " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13148                 " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13149                 " %ld_${var}_ld_2_0   = OpLoad %u32 %ld_${var}_gep_2_0\n"
13150                 " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13151                 " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13152                 " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13153                 " %ld_${var}_bc_0_0   = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13154                 " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13155                 " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13156                 " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13157                 " %ld_${var}_bc_1_0   = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13158                 " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13159                 " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13160                 " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13161                 " %ld_${var}_bc_2_0   = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13162                 " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13163                 " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13164                 " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13165                 "    %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13166                 "    %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13167                 "    %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13168                 "     %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13169                 "     %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13170                 "     %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13171                 "     %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13172                 "     %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13173                 "     %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13174                 "     %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13175                 "  %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13176                 "                       OpReturnValue %ld_${var}_st_test\n"
13177                 "                      OpFunctionEnd\n"
13178         );
13179
13180         const string storeF16
13181         (
13182                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13183                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13184                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13185                 " %st_${var}_entry = OpLabel\n"
13186                 "    %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13187                 "  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13188                 "                    OpReturn\n"
13189                 "                    OpFunctionEnd\n" +
13190                 storeScalarF16AsUint
13191         );
13192
13193         const string storeV2F16
13194         (
13195                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13196                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13197                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13198                 " %st_${var}_entry = OpLabel\n"
13199                 "    %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13200                 "  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13201                 "                    OpReturn\n"
13202                 "                    OpFunctionEnd\n" +
13203                 storeV2F16AsUint
13204         );
13205
13206         const string storeV3F16
13207         (
13208                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13209                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13210                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13211                 " %st_${var}_entry = OpLabel\n"
13212                 "    %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13213                 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13214                 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13215                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13216                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13217                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13218                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13219                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13220                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13221                 "                    OpReturn\n"
13222                 "                    OpFunctionEnd\n"
13223         );
13224
13225         const string storeV4F16
13226         (
13227                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13228                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13229                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13230                 " %st_${var}_entry = OpLabel\n"
13231                 "    %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13232                 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13233                 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13234                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13235                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13236                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13237                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13238                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13239                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13240                 "                    OpReturn\n"
13241                 "                    OpFunctionEnd\n"
13242         );
13243
13244         const string storeF16Arr3
13245         (
13246                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13247                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13248                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13249                 " %st_${var}_entry = OpLabel\n"
13250                 "  %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13251                 "  %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13252                 "  %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13253                 " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13254                 " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13255                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13256                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13257                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13258                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13259                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13260                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13261                 "                    OpReturn\n"
13262                 "                    OpFunctionEnd\n"
13263         );
13264
13265         const string storeV2F16Arr5
13266         (
13267                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13268                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13269                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13270                 " %st_${var}_entry = OpLabel\n"
13271                 "  %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13272                 "  %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13273                 "  %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13274                 "  %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13275                 "  %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13276                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13277                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13278                 "  %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13279                 "  %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13280                 "  %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13281                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13282                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13283                 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13284                 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13285                 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13286                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13287                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13288                 "                    OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13289                 "                    OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13290                 "                    OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13291                 "                    OpReturn\n"
13292                 "                    OpFunctionEnd\n"
13293         );
13294
13295         const string storeV3F16Arr5
13296         (
13297                 "       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13298                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13299                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13300                 " %st_${var}_entry = OpLabel\n"
13301                 "  %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13302                 "  %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13303                 "  %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13304                 "  %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13305                 "  %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13306                 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13307                 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13308                 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13309                 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13310                 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13311                 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13312                 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13313                 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13314                 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13315                 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13316                 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13317                 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13318                 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13319                 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13320                 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13321                 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13322                 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13323                 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13324                 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13325                 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13326                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13327                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13328                 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13329                 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13330                 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13331                 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13332                 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13333                 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13334                 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13335                 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13336                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13337                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13338                 "                    OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13339                 "                    OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13340                 "                    OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13341                 "                    OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13342                 "                    OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13343                 "                    OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13344                 "                    OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13345                 "                    OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13346                 "                    OpReturn\n"
13347                 "                    OpFunctionEnd\n"
13348         );
13349
13350         const string storeV4F16Arr3
13351         (
13352                 "        %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13353                 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13354                 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13355                 "  %st_${var}_entry = OpLabel\n"
13356                 "   %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13357                 "   %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13358                 "   %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13359                 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13360                 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
13361                 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
13362                 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
13363                 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
13364                 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
13365                 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
13366                 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
13367                 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
13368                 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
13369                 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
13370                 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
13371                 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13372                 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13373                 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13374                 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13375                 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13376                 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13377                 "                     OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
13378                 "                     OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
13379                 "                     OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
13380                 "                     OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
13381                 "                     OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
13382                 "                     OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
13383                 "                     OpReturn\n"
13384                 "                     OpFunctionEnd\n"
13385         );
13386
13387         const string storeStruct16Arr3
13388         (
13389                 "          %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13390                 "   %st_${var}_param1 = OpFunctionParameter %st_test\n"
13391                 "   %st_${var}_param2 = OpFunctionParameter %i32\n"
13392                 "    %st_${var}_entry = OpLabel\n"
13393                 "     %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
13394                 "     %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
13395                 "     %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
13396                 "   %st_${var}_el_0   = OpCompositeExtract   %f16 %st_${var}_st_0 0\n"
13397                 "   %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
13398                 "   %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
13399                 "   %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
13400                 "   %st_${var}_el_1   = OpCompositeExtract   %f16 %st_${var}_st_1 0\n"
13401                 "   %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
13402                 "   %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
13403                 "   %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
13404                 "   %st_${var}_el_2   = OpCompositeExtract   %f16 %st_${var}_st_2 0\n"
13405                 "   %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
13406                 "   %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
13407                 "   %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
13408                 "     %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
13409                 "     %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
13410                 "     %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
13411                 "   %st_${var}_bc_0   = OpBitcast %u32 %st_${var}_v2_0\n"
13412                 "   %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13413                 "   %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13414                 "   %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
13415                 "   %st_${var}_bc_1   = OpBitcast %u32 %st_${var}_v2_1\n"
13416                 "   %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13417                 "   %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13418                 "   %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
13419                 "   %st_${var}_bc_2   = OpBitcast %u32 %st_${var}_v2_2\n"
13420                 "   %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13421                 "   %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13422                 "   %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
13423                 "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13424                 "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13425                 "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13426                 "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13427                 "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13428                 "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13429                 "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13430                 "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13431                 "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13432                 "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13433                 "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13434                 "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13435                 "                       OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
13436                 "                       OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
13437                 "                       OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
13438                 "                       OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
13439                 "                       OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
13440                 "                       OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
13441                 "                       OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
13442                 "                       OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
13443                 "                       OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
13444                 "                       OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
13445                 "                       OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
13446                 "                       OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
13447                 "                       OpReturn\n"
13448                 "                       OpFunctionEnd\n"
13449         );
13450
13451         struct OpParts
13452         {
13453                 const char*     premainDecls;
13454                 const char*     swFunCall;
13455                 const char*     swFunHeader;
13456                 const char*     caseDefaultValue;
13457                 const char*     argsPartial;
13458         };
13459
13460         OpParts                                                         opPartsArray[]                  =
13461         {
13462                 // OpCompositeInsert
13463                 {
13464                         "       %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
13465                         "    %SSBO_src = OpTypeStruct %ra_f16\n"
13466                         "    %SSBO_dst = OpTypeStruct %ra_st\n",
13467
13468                         "   %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
13469                         "   %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
13470                         "   %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
13471
13472                         "   %sw_fun = OpFunction %st_test None %fun_t\n"
13473                         "%sw_paramv = OpFunctionParameter %f16\n",
13474
13475                         "%sw_param",
13476
13477                         "%st_test %sw_paramv %sw_param",
13478                 },
13479                 // OpCompositeExtract
13480                 {
13481                         "       %fun_t = OpTypeFunction %f16 %st_test %i32\n"
13482                         "    %SSBO_src = OpTypeStruct %ra_st\n"
13483                         "    %SSBO_dst = OpTypeStruct %ra_f16\n",
13484
13485                         "   %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
13486                         "   %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
13487
13488                         "   %sw_fun = OpFunction %f16 None %fun_t\n",
13489
13490                         "%c_f16_na",
13491
13492                         "%f16 %sw_param",
13493                 },
13494         };
13495
13496         DE_ASSERT(opIndex >= 0 && opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
13497
13498         const char*     accessPathF16[] =
13499         {
13500                 "0",                    // %f16
13501                 DE_NULL,
13502         };
13503         const char*     accessPathV2F16[] =
13504         {
13505                 "0 0",                  // %v2f16
13506                 "0 1",
13507         };
13508         const char*     accessPathV3F16[] =
13509         {
13510                 "0 0",                  // %v3f16
13511                 "0 1",
13512                 "0 2",
13513                 DE_NULL,
13514         };
13515         const char*     accessPathV4F16[] =
13516         {
13517                 "0 0",                  // %v4f16"
13518                 "0 1",
13519                 "0 2",
13520                 "0 3",
13521         };
13522         const char*     accessPathF16Arr3[] =
13523         {
13524                 "0 0",                  // %f16arr3
13525                 "0 1",
13526                 "0 2",
13527                 DE_NULL,
13528         };
13529         const char*     accessPathStruct16Arr3[] =
13530         {
13531                 "0 0 0",                // %struct16arr3
13532                 DE_NULL,
13533                 "0 0 1 0 0",
13534                 "0 0 1 0 1",
13535                 "0 0 1 1 0",
13536                 "0 0 1 1 1",
13537                 "0 0 1 2 0",
13538                 "0 0 1 2 1",
13539                 "0 1 0",
13540                 DE_NULL,
13541                 "0 1 1 0 0",
13542                 "0 1 1 0 1",
13543                 "0 1 1 1 0",
13544                 "0 1 1 1 1",
13545                 "0 1 1 2 0",
13546                 "0 1 1 2 1",
13547                 "0 2 0",
13548                 DE_NULL,
13549                 "0 2 1 0 0",
13550                 "0 2 1 0 1",
13551                 "0 2 1 1 0",
13552                 "0 2 1 1 1",
13553                 "0 2 1 2 0",
13554                 "0 2 1 2 1",
13555         };
13556         const char*     accessPathV2F16Arr5[] =
13557         {
13558                 "0 0 0",                // %v2f16arr5
13559                 "0 0 1",
13560                 "0 1 0",
13561                 "0 1 1",
13562                 "0 2 0",
13563                 "0 2 1",
13564                 "0 3 0",
13565                 "0 3 1",
13566                 "0 4 0",
13567                 "0 4 1",
13568         };
13569         const char*     accessPathV3F16Arr5[] =
13570         {
13571                 "0 0 0",                // %v3f16arr5
13572                 "0 0 1",
13573                 "0 0 2",
13574                 DE_NULL,
13575                 "0 1 0",
13576                 "0 1 1",
13577                 "0 1 2",
13578                 DE_NULL,
13579                 "0 2 0",
13580                 "0 2 1",
13581                 "0 2 2",
13582                 DE_NULL,
13583                 "0 3 0",
13584                 "0 3 1",
13585                 "0 3 2",
13586                 DE_NULL,
13587                 "0 4 0",
13588                 "0 4 1",
13589                 "0 4 2",
13590                 DE_NULL,
13591         };
13592         const char*     accessPathV4F16Arr3[] =
13593         {
13594                 "0 0 0",                // %v4f16arr3
13595                 "0 0 1",
13596                 "0 0 2",
13597                 "0 0 3",
13598                 "0 1 0",
13599                 "0 1 1",
13600                 "0 1 2",
13601                 "0 1 3",
13602                 "0 2 0",
13603                 "0 2 1",
13604                 "0 2 2",
13605                 "0 2 3",
13606                 DE_NULL,
13607                 DE_NULL,
13608                 DE_NULL,
13609                 DE_NULL,
13610         };
13611
13612         struct TypeTestParameters
13613         {
13614                 const char*             name;
13615                 size_t                  accessPathLength;
13616                 const char**    accessPath;
13617                 const string    loadFunction;
13618                 const string    storeFunction;
13619         };
13620
13621         const TypeTestParameters typeTestParameters[] =
13622         {
13623                 {       "f16",                  DE_LENGTH_OF_ARRAY(accessPathF16),                      accessPathF16,                  loadF16,                        storeF16                 },
13624                 {       "v2f16",                DE_LENGTH_OF_ARRAY(accessPathV2F16),            accessPathV2F16,                loadV2F16,                      storeV2F16               },
13625                 {       "v3f16",                DE_LENGTH_OF_ARRAY(accessPathV3F16),            accessPathV3F16,                loadV3F16,                      storeV3F16               },
13626                 {       "v4f16",                DE_LENGTH_OF_ARRAY(accessPathV4F16),            accessPathV4F16,                loadV4F16,                      storeV4F16                },
13627                 {       "f16arr3",              DE_LENGTH_OF_ARRAY(accessPathF16Arr3),          accessPathF16Arr3,              loadF16Arr3,            storeF16Arr3      },
13628                 {       "v2f16arr5",    DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5),        accessPathV2F16Arr5,    loadV2F16Arr5,          storeV2F16Arr5    },
13629                 {       "v3f16arr5",    DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5),        accessPathV3F16Arr5,    loadV3F16Arr5,          storeV3F16Arr5    },
13630                 {       "v4f16arr3",    DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3),        accessPathV4F16Arr3,    loadV4F16Arr3,          storeV4F16Arr3    },
13631                 {       "struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3),     accessPathStruct16Arr3, loadStruct16Arr3,       storeStruct16Arr3},
13632         };
13633
13634         for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
13635         {
13636                 const OpParts           opParts                         = opPartsArray[opIndex];
13637                 const string            testName                        = typeTestParameters[typeTestNdx].name;
13638                 const size_t            structItemsCount        = typeTestParameters[typeTestNdx].accessPathLength;
13639                 const char**            accessPath                      = typeTestParameters[typeTestNdx].accessPath;
13640                 SpecResource            specResource;
13641                 map<string, string>     specs;
13642                 VulkanFeatures          features;
13643                 map<string, string>     fragments;
13644                 vector<string>          extensions;
13645                 vector<deFloat16>       inputFP16;
13646                 vector<deFloat16>       dummyFP16Output;
13647
13648                 // Generate values for input
13649                 inputFP16.reserve(structItemsCount);
13650                 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13651                         inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
13652
13653                 dummyFP16Output.resize(structItemsCount);
13654
13655                 // Generate cases for OpSwitch
13656                 {
13657                         string  caseBodies;
13658                         string  caseList;
13659
13660                         for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
13661                                 if (accessPath[caseNdx] != DE_NULL)
13662                                 {
13663                                         map<string, string>     specCase;
13664
13665                                         specCase["case_ndx"]            = de::toString(caseNdx);
13666                                         specCase["access_path"]         = accessPath[caseNdx];
13667                                         specCase["op_args_part"]        = opParts.argsPartial;
13668                                         specCase["op_name"]                     = opName;
13669
13670                                         caseBodies      += testCaseBody.specialize(specCase);
13671                                         caseList        += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
13672                                 }
13673
13674                         specs["case_bodies"]    = caseBodies;
13675                         specs["case_list"]              = caseList;
13676                 }
13677
13678                 specs["num_elements"]                   = de::toString(structItemsCount);
13679                 specs["field_type"]                             = typeTestParameters[typeTestNdx].name;
13680                 specs["struct_item_size"]               = de::toString(structItemsCount * sizeof(deFloat16));
13681                 specs["struct_u32s"]                    = de::toString(structItemsCount / 2);
13682                 specs["op_premain_decls"]               = opParts.premainDecls;
13683                 specs["op_sw_fun_call"]                 = opParts.swFunCall;
13684                 specs["op_sw_fun_header"]               = opParts.swFunHeader;
13685                 specs["op_case_default_value"]  = opParts.caseDefaultValue;
13686                 if (opIndex == 0) {
13687                         specs["st_call"]                        = "st_ssbo_dst";
13688                         specs["st_ndx"]                         = "c_i32_0";
13689                 } else {
13690                         specs["st_call"]                        = "st_fn_ssbo_dst";
13691                         specs["st_ndx"]                         = "ndx";
13692                 }
13693
13694                 fragments["capability"]         = "OpCapability Float16\n";
13695                 fragments["decoration"]         = decoration.specialize(specs);
13696                 fragments["pre_main"]           = preMain.specialize(specs);
13697                 fragments["testfun"]            = testFun.specialize(specs);
13698                 if (opIndex == 0) {
13699                         fragments["testfun"]            += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
13700                         fragments["testfun"]            += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
13701                         fragments["testfun"]            += StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
13702                 } else {
13703                         fragments["testfun"]            += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
13704                         fragments["testfun"]            += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
13705                 }
13706
13707                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13708                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(dummyFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13709                 specResource.verifyIO = compareFP16CompositeFunc;
13710
13711                 extensions.push_back("VK_KHR_shader_float16_int8");
13712
13713                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
13714
13715                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13716         }
13717
13718         return testGroup.release();
13719 }
13720
13721 struct fp16PerComponent
13722 {
13723         fp16PerComponent()
13724                 : flavor(0)
13725                 , floatFormat16 (-14, 15, 10, true)
13726                 , outCompCount(0)
13727                 , argCompCount(3, 0)
13728         {
13729         }
13730
13731         bool                    callOncePerComponent    ()                                                                      { return true; }
13732         deUint32                getComponentValidity    ()                                                                      { return static_cast<deUint32>(-1); }
13733
13734         virtual double  getULPs                                 (vector<const deFloat16*>&)                     { return 1.0; }
13735         virtual double  getMin                                  (double value, double ulps)                     { return value - floatFormat16.ulp(deAbs(value), ulps); }
13736         virtual double  getMax                                  (double value, double ulps)                     { return value + floatFormat16.ulp(deAbs(value), ulps); }
13737
13738         virtual size_t  getFlavorCount                  ()                                                                      { return flavorNames.empty() ? 1 : flavorNames.size(); }
13739         virtual void    setFlavor                               (size_t flavorNo)                                       { DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
13740         virtual size_t  getFlavor                               ()                                                                      { return flavor; }
13741         virtual string  getCurrentFlavorName    ()                                                                      { return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
13742
13743         virtual void    setOutCompCount                 (size_t compCount)                                      { outCompCount = compCount; }
13744         virtual size_t  getOutCompCount                 ()                                                                      { return outCompCount; }
13745
13746         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)        { argCompCount[argNo] = compCount; }
13747         virtual size_t  getArgCompCount                 (size_t argNo)                                          { return argCompCount[argNo]; }
13748
13749 protected:
13750         size_t                          flavor;
13751         tcu::FloatFormat        floatFormat16;
13752         size_t                          outCompCount;
13753         vector<size_t>          argCompCount;
13754         vector<string>          flavorNames;
13755 };
13756
13757 struct fp16OpFNegate : public fp16PerComponent
13758 {
13759         template <class fp16type>
13760         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13761         {
13762                 const fp16type  x               (*in[0]);
13763                 const double    d               (x.asDouble());
13764                 const double    result  (0.0 - d);
13765
13766                 out[0] = fp16type(result).bits();
13767                 min[0] = getMin(result, getULPs(in));
13768                 max[0] = getMax(result, getULPs(in));
13769
13770                 return true;
13771         }
13772 };
13773
13774 struct fp16Round : public fp16PerComponent
13775 {
13776         fp16Round() : fp16PerComponent()
13777         {
13778                 flavorNames.push_back("Floor(x+0.5)");
13779                 flavorNames.push_back("Floor(x-0.5)");
13780                 flavorNames.push_back("RoundEven");
13781         }
13782
13783         template<class fp16type>
13784         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13785         {
13786                 const fp16type  x               (*in[0]);
13787                 const double    d               (x.asDouble());
13788                 double                  result  (0.0);
13789
13790                 switch (flavor)
13791                 {
13792                         case 0:         result = deRound(d);            break;
13793                         case 1:         result = deFloor(d - 0.5);      break;
13794                         case 2:         result = deRoundEven(d);        break;
13795                         default:        TCU_THROW(InternalError, "Invalid flavor specified");
13796                 }
13797
13798                 out[0] = fp16type(result).bits();
13799                 min[0] = getMin(result, getULPs(in));
13800                 max[0] = getMax(result, getULPs(in));
13801
13802                 return true;
13803         }
13804 };
13805
13806 struct fp16RoundEven : public fp16PerComponent
13807 {
13808         template<class fp16type>
13809         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13810         {
13811                 const fp16type  x               (*in[0]);
13812                 const double    d               (x.asDouble());
13813                 const double    result  (deRoundEven(d));
13814
13815                 out[0] = fp16type(result).bits();
13816                 min[0] = getMin(result, getULPs(in));
13817                 max[0] = getMax(result, getULPs(in));
13818
13819                 return true;
13820         }
13821 };
13822
13823 struct fp16Trunc : public fp16PerComponent
13824 {
13825         template<class fp16type>
13826         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13827         {
13828                 const fp16type  x               (*in[0]);
13829                 const double    d               (x.asDouble());
13830                 const double    result  (deTrunc(d));
13831
13832                 out[0] = fp16type(result).bits();
13833                 min[0] = getMin(result, getULPs(in));
13834                 max[0] = getMax(result, getULPs(in));
13835
13836                 return true;
13837         }
13838 };
13839
13840 struct fp16FAbs : public fp16PerComponent
13841 {
13842         template<class fp16type>
13843         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13844         {
13845                 const fp16type  x               (*in[0]);
13846                 const double    d               (x.asDouble());
13847                 const double    result  (deAbs(d));
13848
13849                 out[0] = fp16type(result).bits();
13850                 min[0] = getMin(result, getULPs(in));
13851                 max[0] = getMax(result, getULPs(in));
13852
13853                 return true;
13854         }
13855 };
13856
13857 struct fp16FSign : public fp16PerComponent
13858 {
13859         template<class fp16type>
13860         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13861         {
13862                 const fp16type  x               (*in[0]);
13863                 const double    d               (x.asDouble());
13864                 const double    result  (deSign(d));
13865
13866                 if (x.isNaN())
13867                         return false;
13868
13869                 out[0] = fp16type(result).bits();
13870                 min[0] = getMin(result, getULPs(in));
13871                 max[0] = getMax(result, getULPs(in));
13872
13873                 return true;
13874         }
13875 };
13876
13877 struct fp16Floor : public fp16PerComponent
13878 {
13879         template<class fp16type>
13880         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13881         {
13882                 const fp16type  x               (*in[0]);
13883                 const double    d               (x.asDouble());
13884                 const double    result  (deFloor(d));
13885
13886                 out[0] = fp16type(result).bits();
13887                 min[0] = getMin(result, getULPs(in));
13888                 max[0] = getMax(result, getULPs(in));
13889
13890                 return true;
13891         }
13892 };
13893
13894 struct fp16Ceil : public fp16PerComponent
13895 {
13896         template<class fp16type>
13897         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13898         {
13899                 const fp16type  x               (*in[0]);
13900                 const double    d               (x.asDouble());
13901                 const double    result  (deCeil(d));
13902
13903                 out[0] = fp16type(result).bits();
13904                 min[0] = getMin(result, getULPs(in));
13905                 max[0] = getMax(result, getULPs(in));
13906
13907                 return true;
13908         }
13909 };
13910
13911 struct fp16Fract : public fp16PerComponent
13912 {
13913         template<class fp16type>
13914         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13915         {
13916                 const fp16type  x               (*in[0]);
13917                 const double    d               (x.asDouble());
13918                 const double    result  (deFrac(d));
13919
13920                 out[0] = fp16type(result).bits();
13921                 min[0] = getMin(result, getULPs(in));
13922                 max[0] = getMax(result, getULPs(in));
13923
13924                 return true;
13925         }
13926 };
13927
13928 struct fp16Radians : public fp16PerComponent
13929 {
13930         virtual double getULPs (vector<const deFloat16*>& in)
13931         {
13932                 DE_UNREF(in);
13933
13934                 return 2.5;
13935         }
13936
13937         template<class fp16type>
13938         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13939         {
13940                 const fp16type  x               (*in[0]);
13941                 const float             d               (x.asFloat());
13942                 const float             result  (deFloatRadians(d));
13943
13944                 out[0] = fp16type(result).bits();
13945                 min[0] = getMin(result, getULPs(in));
13946                 max[0] = getMax(result, getULPs(in));
13947
13948                 return true;
13949         }
13950 };
13951
13952 struct fp16Degrees : public fp16PerComponent
13953 {
13954         virtual double getULPs (vector<const deFloat16*>& in)
13955         {
13956                 DE_UNREF(in);
13957
13958                 return 2.5;
13959         }
13960
13961         template<class fp16type>
13962         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13963         {
13964                 const fp16type  x               (*in[0]);
13965                 const float             d               (x.asFloat());
13966                 const float             result  (deFloatDegrees(d));
13967
13968                 out[0] = fp16type(result).bits();
13969                 min[0] = getMin(result, getULPs(in));
13970                 max[0] = getMax(result, getULPs(in));
13971
13972                 return true;
13973         }
13974 };
13975
13976 struct fp16Sin : public fp16PerComponent
13977 {
13978         template<class fp16type>
13979         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13980         {
13981                 const fp16type  x                       (*in[0]);
13982                 const double    d                       (x.asDouble());
13983                 const double    result          (deSin(d));
13984                 const double    unspecUlp       (16.0);
13985                 const double    err                     (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
13986
13987                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
13988                         return false;
13989
13990                 out[0] = fp16type(result).bits();
13991                 min[0] = result - err;
13992                 max[0] = result + err;
13993
13994                 return true;
13995         }
13996 };
13997
13998 struct fp16Cos : public fp16PerComponent
13999 {
14000         template<class fp16type>
14001         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14002         {
14003                 const fp16type  x                       (*in[0]);
14004                 const double    d                       (x.asDouble());
14005                 const double    result          (deCos(d));
14006                 const double    unspecUlp       (16.0);
14007                 const double    err                     (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14008
14009                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14010                         return false;
14011
14012                 out[0] = fp16type(result).bits();
14013                 min[0] = result - err;
14014                 max[0] = result + err;
14015
14016                 return true;
14017         }
14018 };
14019
14020 struct fp16Tan : public fp16PerComponent
14021 {
14022         template<class fp16type>
14023         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14024         {
14025                 const fp16type  x               (*in[0]);
14026                 const double    d               (x.asDouble());
14027                 const double    result  (deTan(d));
14028
14029                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14030                         return false;
14031
14032                 out[0] = fp16type(result).bits();
14033                 {
14034                         const double    err                     = deLdExp(1.0, -7);
14035                         const double    s1                      = deSin(d) + err;
14036                         const double    s2                      = deSin(d) - err;
14037                         const double    c1                      = deCos(d) + err;
14038                         const double    c2                      = deCos(d) - err;
14039                         const double    edgeVals[]      = {s1/c1, s1/c2, s2/c1, s2/c2};
14040                         double                  edgeLeft        = out[0];
14041                         double                  edgeRight       = out[0];
14042
14043                         if (deSign(c1 * c2) < 0.0)
14044                         {
14045                                 edgeLeft        = -std::numeric_limits<double>::infinity();
14046                                 edgeRight       = +std::numeric_limits<double>::infinity();
14047                         }
14048                         else
14049                         {
14050                                 edgeLeft        = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14051                                 edgeRight       = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14052                         }
14053
14054                         min[0] = edgeLeft;
14055                         max[0] = edgeRight;
14056                 }
14057
14058                 return true;
14059         }
14060 };
14061
14062 struct fp16Asin : public fp16PerComponent
14063 {
14064         template<class fp16type>
14065         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14066         {
14067                 const fp16type  x               (*in[0]);
14068                 const double    d               (x.asDouble());
14069                 const double    result  (deAsin(d));
14070                 const double    error   (deAtan2(d, sqrt(1.0 - d * d)));
14071
14072                 if (!x.isNaN() && deAbs(d) > 1.0)
14073                         return false;
14074
14075                 out[0] = fp16type(result).bits();
14076                 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14077                 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14078
14079                 return true;
14080         }
14081 };
14082
14083 struct fp16Acos : public fp16PerComponent
14084 {
14085         template<class fp16type>
14086         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14087         {
14088                 const fp16type  x               (*in[0]);
14089                 const double    d               (x.asDouble());
14090                 const double    result  (deAcos(d));
14091                 const double    error   (deAtan2(sqrt(1.0 - d * d), d));
14092
14093                 if (!x.isNaN() && deAbs(d) > 1.0)
14094                         return false;
14095
14096                 out[0] = fp16type(result).bits();
14097                 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14098                 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14099
14100                 return true;
14101         }
14102 };
14103
14104 struct fp16Atan : public fp16PerComponent
14105 {
14106         virtual double getULPs(vector<const deFloat16*>& in)
14107         {
14108                 DE_UNREF(in);
14109
14110                 return 2 * 5.0; // This is not a precision test. Value is not from spec
14111         }
14112
14113         template<class fp16type>
14114         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14115         {
14116                 const fp16type  x               (*in[0]);
14117                 const double    d               (x.asDouble());
14118                 const double    result  (deAtanOver(d));
14119
14120                 out[0] = fp16type(result).bits();
14121                 min[0] = getMin(result, getULPs(in));
14122                 max[0] = getMax(result, getULPs(in));
14123
14124                 return true;
14125         }
14126 };
14127
14128 struct fp16Sinh : public fp16PerComponent
14129 {
14130         fp16Sinh() : fp16PerComponent()
14131         {
14132                 flavorNames.push_back("Double");
14133                 flavorNames.push_back("ExpFP16");
14134         }
14135
14136         template<class fp16type>
14137         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14138         {
14139                 const fp16type  x               (*in[0]);
14140                 const double    d               (x.asDouble());
14141                 const double    ulps    (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14142                 double                  result  (0.0);
14143                 double                  error   (0.0);
14144
14145                 if (getFlavor() == 0)
14146                 {
14147                         result  = deSinh(d);
14148                         error   = floatFormat16.ulp(deAbs(result), ulps);
14149                 }
14150                 else if (getFlavor() == 1)
14151                 {
14152                         const fp16type  epx     (deExp(d));
14153                         const fp16type  enx     (deExp(-d));
14154                         const fp16type  esx     (epx.asDouble() - enx.asDouble());
14155                         const fp16type  sx2     (esx.asDouble() / 2.0);
14156
14157                         result  = sx2.asDouble();
14158                         error   = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14159                 }
14160                 else
14161                 {
14162                         TCU_THROW(InternalError, "Unknown flavor");
14163                 }
14164
14165                 out[0] = fp16type(result).bits();
14166                 min[0] = result - error;
14167                 max[0] = result + error;
14168
14169                 return true;
14170         }
14171 };
14172
14173 struct fp16Cosh : public fp16PerComponent
14174 {
14175         fp16Cosh() : fp16PerComponent()
14176         {
14177                 flavorNames.push_back("Double");
14178                 flavorNames.push_back("ExpFP16");
14179         }
14180
14181         template<class fp16type>
14182         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14183         {
14184                 const fp16type  x               (*in[0]);
14185                 const double    d               (x.asDouble());
14186                 const double    ulps    (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14187                 double                  result  (0.0);
14188
14189                 if (getFlavor() == 0)
14190                 {
14191                         result = deCosh(d);
14192                 }
14193                 else if (getFlavor() == 1)
14194                 {
14195                         const fp16type  epx     (deExp(d));
14196                         const fp16type  enx     (deExp(-d));
14197                         const fp16type  esx     (epx.asDouble() + enx.asDouble());
14198                         const fp16type  sx2     (esx.asDouble() / 2.0);
14199
14200                         result = sx2.asDouble();
14201                 }
14202                 else
14203                 {
14204                         TCU_THROW(InternalError, "Unknown flavor");
14205                 }
14206
14207                 out[0] = fp16type(result).bits();
14208                 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14209                 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14210
14211                 return true;
14212         }
14213 };
14214
14215 struct fp16Tanh : public fp16PerComponent
14216 {
14217         fp16Tanh() : fp16PerComponent()
14218         {
14219                 flavorNames.push_back("Tanh");
14220                 flavorNames.push_back("SinhCosh");
14221                 flavorNames.push_back("SinhCoshFP16");
14222                 flavorNames.push_back("PolyFP16");
14223         }
14224
14225         virtual double getULPs (vector<const deFloat16*>& in)
14226         {
14227                 const tcu::Float16      x       (*in[0]);
14228                 const double            d       (x.asDouble());
14229
14230                 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14231         }
14232
14233         template<class fp16type>
14234         inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14235         {
14236                 const fp16type  esx     (espx.asDouble() - esnx.asDouble());
14237                 const fp16type  sx2     (esx.asDouble() / 2.0);
14238                 const fp16type  ecx     (ecpx.asDouble() + ecnx.asDouble());
14239                 const fp16type  cx2     (ecx.asDouble() / 2.0);
14240                 const fp16type  tg      (sx2.asDouble() / cx2.asDouble());
14241                 const double    rez     (tg.asDouble());
14242
14243                 return rez;
14244         }
14245
14246         template<class fp16type>
14247         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14248         {
14249                 const fp16type  x               (*in[0]);
14250                 const double    d               (x.asDouble());
14251                 double                  result  (0.0);
14252
14253                 if (getFlavor() == 0)
14254                 {
14255                         result  = deTanh(d);
14256                         min[0]  = getMin(result, getULPs(in));
14257                         max[0]  = getMax(result, getULPs(in));
14258                 }
14259                 else if (getFlavor() == 1)
14260                 {
14261                         result  = deSinh(d) / deCosh(d);
14262                         min[0]  = getMin(result, getULPs(in));
14263                         max[0]  = getMax(result, getULPs(in));
14264                 }
14265                 else if (getFlavor() == 2)
14266                 {
14267                         const fp16type  s       (deSinh(d));
14268                         const fp16type  c       (deCosh(d));
14269
14270                         result  = s.asDouble() / c.asDouble();
14271                         min[0]  = getMin(result, getULPs(in));
14272                         max[0]  = getMax(result, getULPs(in));
14273                 }
14274                 else if (getFlavor() == 3)
14275                 {
14276                         const double    ulps    (getULPs(in));
14277                         const double    epxm    (deExp( d));
14278                         const double    enxm    (deExp(-d));
14279                         const double    epxmerr = floatFormat16.ulp(epxm, ulps);
14280                         const double    enxmerr = floatFormat16.ulp(enxm, ulps);
14281                         const fp16type  epx[]   = { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14282                         const fp16type  enx[]   = { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14283                         const fp16type  epxm16  (epxm);
14284                         const fp16type  enxm16  (enxm);
14285                         vector<double>  tgs;
14286
14287                         for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14288                         for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14289                         for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14290                         for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14291                         {
14292                                 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14293
14294                                 tgs.push_back(tgh);
14295                         }
14296
14297                         result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14298                         min[0] = *std::min_element(tgs.begin(), tgs.end());
14299                         max[0] = *std::max_element(tgs.begin(), tgs.end());
14300                 }
14301                 else
14302                 {
14303                         TCU_THROW(InternalError, "Unknown flavor");
14304                 }
14305
14306                 out[0] = fp16type(result).bits();
14307
14308                 return true;
14309         }
14310 };
14311
14312 struct fp16Asinh : public fp16PerComponent
14313 {
14314         fp16Asinh() : fp16PerComponent()
14315         {
14316                 flavorNames.push_back("Double");
14317                 flavorNames.push_back("PolyFP16Wiki");
14318                 flavorNames.push_back("PolyFP16Abs");
14319         }
14320
14321         virtual double getULPs (vector<const deFloat16*>& in)
14322         {
14323                 DE_UNREF(in);
14324
14325                 return 256.0; // This is not a precision test. Value is not from spec
14326         }
14327
14328         template<class fp16type>
14329         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14330         {
14331                 const fp16type  x               (*in[0]);
14332                 const double    d               (x.asDouble());
14333                 double                  result  (0.0);
14334
14335                 if (getFlavor() == 0)
14336                 {
14337                         result = deAsinh(d);
14338                 }
14339                 else if (getFlavor() == 1)
14340                 {
14341                         const fp16type  x2              (d * d);
14342                         const fp16type  x2p1    (x2.asDouble() + 1.0);
14343                         const fp16type  sq              (deSqrt(x2p1.asDouble()));
14344                         const fp16type  sxsq    (d + sq.asDouble());
14345                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
14346
14347                         if (lsxsq.isInf())
14348                                 return false;
14349
14350                         result = lsxsq.asDouble();
14351                 }
14352                 else if (getFlavor() == 2)
14353                 {
14354                         const fp16type  x2              (d * d);
14355                         const fp16type  x2p1    (x2.asDouble() + 1.0);
14356                         const fp16type  sq              (deSqrt(x2p1.asDouble()));
14357                         const fp16type  sxsq    (deAbs(d) + sq.asDouble());
14358                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
14359
14360                         result = deSign(d) * lsxsq.asDouble();
14361                 }
14362                 else
14363                 {
14364                         TCU_THROW(InternalError, "Unknown flavor");
14365                 }
14366
14367                 out[0] = fp16type(result).bits();
14368                 min[0] = getMin(result, getULPs(in));
14369                 max[0] = getMax(result, getULPs(in));
14370
14371                 return true;
14372         }
14373 };
14374
14375 struct fp16Acosh : public fp16PerComponent
14376 {
14377         fp16Acosh() : fp16PerComponent()
14378         {
14379                 flavorNames.push_back("Double");
14380                 flavorNames.push_back("PolyFP16");
14381         }
14382
14383         virtual double getULPs (vector<const deFloat16*>& in)
14384         {
14385                 DE_UNREF(in);
14386
14387                 return 16.0; // This is not a precision test. Value is not from spec
14388         }
14389
14390         template<class fp16type>
14391         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14392         {
14393                 const fp16type  x               (*in[0]);
14394                 const double    d               (x.asDouble());
14395                 double                  result  (0.0);
14396
14397                 if (!x.isNaN() && d < 1.0)
14398                         return false;
14399
14400                 if (getFlavor() == 0)
14401                 {
14402                         result = deAcosh(d);
14403                 }
14404                 else if (getFlavor() == 1)
14405                 {
14406                         const fp16type  x2              (d * d);
14407                         const fp16type  x2m1    (x2.asDouble() - 1.0);
14408                         const fp16type  sq              (deSqrt(x2m1.asDouble()));
14409                         const fp16type  sxsq    (d + sq.asDouble());
14410                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
14411
14412                         result = lsxsq.asDouble();
14413                 }
14414                 else
14415                 {
14416                         TCU_THROW(InternalError, "Unknown flavor");
14417                 }
14418
14419                 out[0] = fp16type(result).bits();
14420                 min[0] = getMin(result, getULPs(in));
14421                 max[0] = getMax(result, getULPs(in));
14422
14423                 return true;
14424         }
14425 };
14426
14427 struct fp16Atanh : public fp16PerComponent
14428 {
14429         fp16Atanh() : fp16PerComponent()
14430         {
14431                 flavorNames.push_back("Double");
14432                 flavorNames.push_back("PolyFP16");
14433         }
14434
14435         template<class fp16type>
14436         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14437         {
14438                 const fp16type  x               (*in[0]);
14439                 const double    d               (x.asDouble());
14440                 double                  result  (0.0);
14441
14442                 if (deAbs(d) >= 1.0)
14443                         return false;
14444
14445                 if (getFlavor() == 0)
14446                 {
14447                         const double    ulps    (16.0); // This is not a precision test. Value is not from spec
14448
14449                         result = deAtanh(d);
14450                         min[0] = getMin(result, ulps);
14451                         max[0] = getMax(result, ulps);
14452                 }
14453                 else if (getFlavor() == 1)
14454                 {
14455                         const fp16type  x1a             (1.0 + d);
14456                         const fp16type  x1b             (1.0 - d);
14457                         const fp16type  x1d             (x1a.asDouble() / x1b.asDouble());
14458                         const fp16type  lx1d    (deLog(x1d.asDouble()));
14459                         const fp16type  lx1d2   (0.5 * lx1d.asDouble());
14460                         const double    error   (2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
14461
14462                         result = lx1d2.asDouble();
14463                         min[0] = result - error;
14464                         max[0] = result + error;
14465                 }
14466                 else
14467                 {
14468                         TCU_THROW(InternalError, "Unknown flavor");
14469                 }
14470
14471                 out[0] = fp16type(result).bits();
14472
14473                 return true;
14474         }
14475 };
14476
14477 struct fp16Exp : public fp16PerComponent
14478 {
14479         template<class fp16type>
14480         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14481         {
14482                 const fp16type  x               (*in[0]);
14483                 const double    d               (x.asDouble());
14484                 const double    ulps    (10.0 * (1.0 + 2.0 * deAbs(d)));
14485                 const double    result  (deExp(d));
14486
14487                 out[0] = fp16type(result).bits();
14488                 min[0] = getMin(result, ulps);
14489                 max[0] = getMax(result, ulps);
14490
14491                 return true;
14492         }
14493 };
14494
14495 struct fp16Log : public fp16PerComponent
14496 {
14497         template<class fp16type>
14498         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14499         {
14500                 const fp16type  x               (*in[0]);
14501                 const double    d               (x.asDouble());
14502                 const double    result  (deLog(d));
14503                 const double    error   (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
14504
14505                 if (d <= 0.0)
14506                         return false;
14507
14508                 out[0] = fp16type(result).bits();
14509                 min[0] = result - error;
14510                 max[0] = result + error;
14511
14512                 return true;
14513         }
14514 };
14515
14516 struct fp16Exp2 : public fp16PerComponent
14517 {
14518         template<class fp16type>
14519         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14520         {
14521                 const fp16type  x               (*in[0]);
14522                 const double    d               (x.asDouble());
14523                 const double    result  (deExp2(d));
14524                 const double    ulps    (1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
14525
14526                 out[0] = fp16type(result).bits();
14527                 min[0] = getMin(result, ulps);
14528                 max[0] = getMax(result, ulps);
14529
14530                 return true;
14531         }
14532 };
14533
14534 struct fp16Log2 : public fp16PerComponent
14535 {
14536         template<class fp16type>
14537         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14538         {
14539                 const fp16type  x               (*in[0]);
14540                 const double    d               (x.asDouble());
14541                 const double    result  (deLog2(d));
14542                 const double    error   (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
14543
14544                 if (d <= 0.0)
14545                         return false;
14546
14547                 out[0] = fp16type(result).bits();
14548                 min[0] = result - error;
14549                 max[0] = result + error;
14550
14551                 return true;
14552         }
14553 };
14554
14555 struct fp16Sqrt : public fp16PerComponent
14556 {
14557         virtual double getULPs (vector<const deFloat16*>& in)
14558         {
14559                 DE_UNREF(in);
14560
14561                 return 6.0;
14562         }
14563
14564         template<class fp16type>
14565         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14566         {
14567                 const fp16type  x               (*in[0]);
14568                 const double    d               (x.asDouble());
14569                 const double    result  (deSqrt(d));
14570
14571                 if (!x.isNaN() && d < 0.0)
14572                         return false;
14573
14574                 out[0] = fp16type(result).bits();
14575                 min[0] = getMin(result, getULPs(in));
14576                 max[0] = getMax(result, getULPs(in));
14577
14578                 return true;
14579         }
14580 };
14581
14582 struct fp16InverseSqrt : public fp16PerComponent
14583 {
14584         virtual double getULPs (vector<const deFloat16*>& in)
14585         {
14586                 DE_UNREF(in);
14587
14588                 return 2.0;
14589         }
14590
14591         template<class fp16type>
14592         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14593         {
14594                 const fp16type  x               (*in[0]);
14595                 const double    d               (x.asDouble());
14596                 const double    result  (1.0/deSqrt(d));
14597
14598                 if (!x.isNaN() && d <= 0.0)
14599                         return false;
14600
14601                 out[0] = fp16type(result).bits();
14602                 min[0] = getMin(result, getULPs(in));
14603                 max[0] = getMax(result, getULPs(in));
14604
14605                 return true;
14606         }
14607 };
14608
14609 struct fp16ModfFrac : public fp16PerComponent
14610 {
14611         template<class fp16type>
14612         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14613         {
14614                 const fp16type  x               (*in[0]);
14615                 const double    d               (x.asDouble());
14616                 double                  i               (0.0);
14617                 const double    result  (deModf(d, &i));
14618
14619                 if (x.isInf() || x.isNaN())
14620                         return false;
14621
14622                 out[0] = fp16type(result).bits();
14623                 min[0] = getMin(result, getULPs(in));
14624                 max[0] = getMax(result, getULPs(in));
14625
14626                 return true;
14627         }
14628 };
14629
14630 struct fp16ModfInt : public fp16PerComponent
14631 {
14632         template<class fp16type>
14633         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14634         {
14635                 const fp16type  x               (*in[0]);
14636                 const double    d               (x.asDouble());
14637                 double                  i               (0.0);
14638                 const double    dummy   (deModf(d, &i));
14639                 const double    result  (i);
14640
14641                 DE_UNREF(dummy);
14642
14643                 if (x.isInf() || x.isNaN())
14644                         return false;
14645
14646                 out[0] = fp16type(result).bits();
14647                 min[0] = getMin(result, getULPs(in));
14648                 max[0] = getMax(result, getULPs(in));
14649
14650                 return true;
14651         }
14652 };
14653
14654 struct fp16FrexpS : public fp16PerComponent
14655 {
14656         template<class fp16type>
14657         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14658         {
14659                 const fp16type  x               (*in[0]);
14660                 const double    d               (x.asDouble());
14661                 int                             e               (0);
14662                 const double    result  (deFrExp(d, &e));
14663
14664                 if (x.isNaN() || x.isInf())
14665                         return false;
14666
14667                 out[0] = fp16type(result).bits();
14668                 min[0] = getMin(result, getULPs(in));
14669                 max[0] = getMax(result, getULPs(in));
14670
14671                 return true;
14672         }
14673 };
14674
14675 struct fp16FrexpE : public fp16PerComponent
14676 {
14677         template<class fp16type>
14678         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14679         {
14680                 const fp16type  x               (*in[0]);
14681                 const double    d               (x.asDouble());
14682                 int                             e               (0);
14683                 const double    dummy   (deFrExp(d, &e));
14684                 const double    result  (static_cast<double>(e));
14685
14686                 DE_UNREF(dummy);
14687
14688                 if (x.isNaN() || x.isInf())
14689                         return false;
14690
14691                 out[0] = fp16type(result).bits();
14692                 min[0] = getMin(result, getULPs(in));
14693                 max[0] = getMax(result, getULPs(in));
14694
14695                 return true;
14696         }
14697 };
14698
14699 struct fp16OpFAdd : public fp16PerComponent
14700 {
14701         template<class fp16type>
14702         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14703         {
14704                 const fp16type  x               (*in[0]);
14705                 const fp16type  y               (*in[1]);
14706                 const double    xd              (x.asDouble());
14707                 const double    yd              (y.asDouble());
14708                 const double    result  (xd + yd);
14709
14710                 out[0] = fp16type(result).bits();
14711                 min[0] = getMin(result, getULPs(in));
14712                 max[0] = getMax(result, getULPs(in));
14713
14714                 return true;
14715         }
14716 };
14717
14718 struct fp16OpFSub : public fp16PerComponent
14719 {
14720         template<class fp16type>
14721         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14722         {
14723                 const fp16type  x               (*in[0]);
14724                 const fp16type  y               (*in[1]);
14725                 const double    xd              (x.asDouble());
14726                 const double    yd              (y.asDouble());
14727                 const double    result  (xd - yd);
14728
14729                 out[0] = fp16type(result).bits();
14730                 min[0] = getMin(result, getULPs(in));
14731                 max[0] = getMax(result, getULPs(in));
14732
14733                 return true;
14734         }
14735 };
14736
14737 struct fp16OpFMul : public fp16PerComponent
14738 {
14739         template<class fp16type>
14740         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14741         {
14742                 const fp16type  x               (*in[0]);
14743                 const fp16type  y               (*in[1]);
14744                 const double    xd              (x.asDouble());
14745                 const double    yd              (y.asDouble());
14746                 const double    result  (xd * yd);
14747
14748                 out[0] = fp16type(result).bits();
14749                 min[0] = getMin(result, getULPs(in));
14750                 max[0] = getMax(result, getULPs(in));
14751
14752                 return true;
14753         }
14754 };
14755
14756 struct fp16OpFDiv : public fp16PerComponent
14757 {
14758         fp16OpFDiv() : fp16PerComponent()
14759         {
14760                 flavorNames.push_back("DirectDiv");
14761                 flavorNames.push_back("InverseDiv");
14762         }
14763
14764         template<class fp16type>
14765         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14766         {
14767                 const fp16type  x                       (*in[0]);
14768                 const fp16type  y                       (*in[1]);
14769                 const double    xd                      (x.asDouble());
14770                 const double    yd                      (y.asDouble());
14771                 const double    unspecUlp       (16.0);
14772                 const double    ulpCnt          (de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
14773                 double                  result          (0.0);
14774
14775                 if (y.isZero())
14776                         return false;
14777
14778                 if (getFlavor() == 0)
14779                 {
14780                         result = (xd / yd);
14781                 }
14782                 else if (getFlavor() == 1)
14783                 {
14784                         const double    invyd   (1.0 / yd);
14785                         const fp16type  invy    (invyd);
14786
14787                         result = (xd * invy.asDouble());
14788                 }
14789                 else
14790                 {
14791                         TCU_THROW(InternalError, "Unknown flavor");
14792                 }
14793
14794                 out[0] = fp16type(result).bits();
14795                 min[0] = getMin(result, ulpCnt);
14796                 max[0] = getMax(result, ulpCnt);
14797
14798                 return true;
14799         }
14800 };
14801
14802 struct fp16Atan2 : public fp16PerComponent
14803 {
14804         fp16Atan2() : fp16PerComponent()
14805         {
14806                 flavorNames.push_back("DoubleCalc");
14807                 flavorNames.push_back("DoubleCalc_PI");
14808         }
14809
14810         virtual double getULPs(vector<const deFloat16*>& in)
14811         {
14812                 DE_UNREF(in);
14813
14814                 return 2 * 5.0; // This is not a precision test. Value is not from spec
14815         }
14816
14817         template<class fp16type>
14818         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14819         {
14820                 const fp16type  x               (*in[0]);
14821                 const fp16type  y               (*in[1]);
14822                 const double    xd              (x.asDouble());
14823                 const double    yd              (y.asDouble());
14824                 double                  result  (0.0);
14825
14826                 if (x.isZero() && y.isZero())
14827                         return false;
14828
14829                 if (getFlavor() == 0)
14830                 {
14831                         result  = deAtan2(xd, yd);
14832                 }
14833                 else if (getFlavor() == 1)
14834                 {
14835                         const double    ulps    (2.0 * 5.0); // This is not a precision test. Value is not from spec
14836                         const double    eps             (floatFormat16.ulp(DE_PI_DOUBLE, ulps));
14837
14838                         result  = deAtan2(xd, yd);
14839
14840                         if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
14841                                 result  = -result;
14842                 }
14843                 else
14844                 {
14845                         TCU_THROW(InternalError, "Unknown flavor");
14846                 }
14847
14848                 out[0] = fp16type(result).bits();
14849                 min[0] = getMin(result, getULPs(in));
14850                 max[0] = getMax(result, getULPs(in));
14851
14852                 return true;
14853         }
14854 };
14855
14856 struct fp16Pow : public fp16PerComponent
14857 {
14858         fp16Pow() : fp16PerComponent()
14859         {
14860                 flavorNames.push_back("Pow");
14861                 flavorNames.push_back("PowLog2");
14862                 flavorNames.push_back("PowLog2FP16");
14863         }
14864
14865         template<class fp16type>
14866         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14867         {
14868                 const fp16type  x               (*in[0]);
14869                 const fp16type  y               (*in[1]);
14870                 const double    xd              (x.asDouble());
14871                 const double    yd              (y.asDouble());
14872                 const double    logxeps (de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
14873                 const double    ulps1   (1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
14874                 const double    ulps2   (1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
14875                 const double    ulps    (deMax(deAbs(ulps1), deAbs(ulps2)));
14876                 double                  result  (0.0);
14877
14878                 if (xd < 0.0)
14879                         return false;
14880
14881                 if (x.isZero() && yd <= 0.0)
14882                         return false;
14883
14884                 if (getFlavor() == 0)
14885                 {
14886                         result = dePow(xd, yd);
14887                 }
14888                 else if (getFlavor() == 1)
14889                 {
14890                         const double    l2d     (deLog2(xd));
14891                         const double    e2d     (deExp2(yd * l2d));
14892
14893                         result = e2d;
14894                 }
14895                 else if (getFlavor() == 2)
14896                 {
14897                         const double    l2d     (deLog2(xd));
14898                         const fp16type  l2      (l2d);
14899                         const double    e2d     (deExp2(yd * l2.asDouble()));
14900                         const fp16type  e2      (e2d);
14901
14902                         result = e2.asDouble();
14903                 }
14904                 else
14905                 {
14906                         TCU_THROW(InternalError, "Unknown flavor");
14907                 }
14908
14909                 out[0] = fp16type(result).bits();
14910                 min[0] = getMin(result, ulps);
14911                 max[0] = getMax(result, ulps);
14912
14913                 return true;
14914         }
14915 };
14916
14917 struct fp16FMin : public fp16PerComponent
14918 {
14919         template<class fp16type>
14920         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14921         {
14922                 const fp16type  x               (*in[0]);
14923                 const fp16type  y               (*in[1]);
14924                 const double    xd              (x.asDouble());
14925                 const double    yd              (y.asDouble());
14926                 const double    result  (deMin(xd, yd));
14927
14928                 if (x.isNaN() || y.isNaN())
14929                         return false;
14930
14931                 out[0] = fp16type(result).bits();
14932                 min[0] = getMin(result, getULPs(in));
14933                 max[0] = getMax(result, getULPs(in));
14934
14935                 return true;
14936         }
14937 };
14938
14939 struct fp16FMax : public fp16PerComponent
14940 {
14941         template<class fp16type>
14942         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14943         {
14944                 const fp16type  x               (*in[0]);
14945                 const fp16type  y               (*in[1]);
14946                 const double    xd              (x.asDouble());
14947                 const double    yd              (y.asDouble());
14948                 const double    result  (deMax(xd, yd));
14949
14950                 if (x.isNaN() || y.isNaN())
14951                         return false;
14952
14953                 out[0] = fp16type(result).bits();
14954                 min[0] = getMin(result, getULPs(in));
14955                 max[0] = getMax(result, getULPs(in));
14956
14957                 return true;
14958         }
14959 };
14960
14961 struct fp16Step : public fp16PerComponent
14962 {
14963         template<class fp16type>
14964         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14965         {
14966                 const fp16type  edge    (*in[0]);
14967                 const fp16type  x               (*in[1]);
14968                 const double    edged   (edge.asDouble());
14969                 const double    xd              (x.asDouble());
14970                 const double    result  (deStep(edged, xd));
14971
14972                 out[0] = fp16type(result).bits();
14973                 min[0] = getMin(result, getULPs(in));
14974                 max[0] = getMax(result, getULPs(in));
14975
14976                 return true;
14977         }
14978 };
14979
14980 struct fp16Ldexp : public fp16PerComponent
14981 {
14982         template<class fp16type>
14983         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14984         {
14985                 const fp16type  x               (*in[0]);
14986                 const fp16type  y               (*in[1]);
14987                 const double    xd              (x.asDouble());
14988                 const int               yd              (static_cast<int>(deTrunc(y.asDouble())));
14989                 const double    result  (deLdExp(xd, yd));
14990
14991                 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
14992                         return false;
14993
14994                 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
14995                 if (fp16type(result).isInf())
14996                         return false;
14997
14998                 out[0] = fp16type(result).bits();
14999                 min[0] = getMin(result, getULPs(in));
15000                 max[0] = getMax(result, getULPs(in));
15001
15002                 return true;
15003         }
15004 };
15005
15006 struct fp16FClamp : public fp16PerComponent
15007 {
15008         template<class fp16type>
15009         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15010         {
15011                 const fp16type  x               (*in[0]);
15012                 const fp16type  minVal  (*in[1]);
15013                 const fp16type  maxVal  (*in[2]);
15014                 const double    xd              (x.asDouble());
15015                 const double    minVald (minVal.asDouble());
15016                 const double    maxVald (maxVal.asDouble());
15017                 const double    result  (deClamp(xd, minVald, maxVald));
15018
15019                 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15020                         return false;
15021
15022                 out[0] = fp16type(result).bits();
15023                 min[0] = getMin(result, getULPs(in));
15024                 max[0] = getMax(result, getULPs(in));
15025
15026                 return true;
15027         }
15028 };
15029
15030 struct fp16FMix : public fp16PerComponent
15031 {
15032         fp16FMix() : fp16PerComponent()
15033         {
15034                 flavorNames.push_back("DoubleCalc");
15035                 flavorNames.push_back("EmulatingFP16");
15036                 flavorNames.push_back("EmulatingFP16YminusX");
15037         }
15038
15039         template<class fp16type>
15040         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15041         {
15042                 const fp16type  x               (*in[0]);
15043                 const fp16type  y               (*in[1]);
15044                 const fp16type  a               (*in[2]);
15045                 const double    ulps    (8.0); // This is not a precision test. Value is not from spec
15046                 double                  result  (0.0);
15047
15048                 if (getFlavor() == 0)
15049                 {
15050                         const double    xd              (x.asDouble());
15051                         const double    yd              (y.asDouble());
15052                         const double    ad              (a.asDouble());
15053                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15054                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
15055                         const double    eps             (xeps + yeps);
15056
15057                         result = deMix(xd, yd, ad);
15058                         min[0] = result - eps;
15059                         max[0] = result + eps;
15060                 }
15061                 else if (getFlavor() == 1)
15062                 {
15063                         const double    xd              (x.asDouble());
15064                         const double    yd              (y.asDouble());
15065                         const double    ad              (a.asDouble());
15066                         const fp16type  am              (1.0 - ad);
15067                         const double    amd             (am.asDouble());
15068                         const fp16type  xam             (xd * amd);
15069                         const double    xamd    (xam.asDouble());
15070                         const fp16type  ya              (yd * ad);
15071                         const double    yad             (ya.asDouble());
15072                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15073                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
15074                         const double    eps             (xeps + yeps);
15075
15076                         result = xamd + yad;
15077                         min[0] = result - eps;
15078                         max[0] = result + eps;
15079                 }
15080                 else if (getFlavor() == 2)
15081                 {
15082                         const double    xd              (x.asDouble());
15083                         const double    yd              (y.asDouble());
15084                         const double    ad              (a.asDouble());
15085                         const fp16type  ymx             (yd - xd);
15086                         const double    ymxd    (ymx.asDouble());
15087                         const fp16type  ymxa    (ymxd * ad);
15088                         const double    ymxad   (ymxa.asDouble());
15089                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15090                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
15091                         const double    eps             (xeps + yeps);
15092
15093                         result = xd + ymxad;
15094                         min[0] = result - eps;
15095                         max[0] = result + eps;
15096                 }
15097                 else
15098                 {
15099                         TCU_THROW(InternalError, "Unknown flavor");
15100                 }
15101
15102                 out[0] = fp16type(result).bits();
15103
15104                 return true;
15105         }
15106 };
15107
15108 struct fp16SmoothStep : public fp16PerComponent
15109 {
15110         fp16SmoothStep() : fp16PerComponent()
15111         {
15112                 flavorNames.push_back("FloatCalc");
15113                 flavorNames.push_back("EmulatingFP16");
15114                 flavorNames.push_back("EmulatingFP16WClamp");
15115         }
15116
15117         virtual double getULPs(vector<const deFloat16*>& in)
15118         {
15119                 DE_UNREF(in);
15120
15121                 return 4.0; // This is not a precision test. Value is not from spec
15122         }
15123
15124         template<class fp16type>
15125         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15126         {
15127                 const fp16type  edge0   (*in[0]);
15128                 const fp16type  edge1   (*in[1]);
15129                 const fp16type  x               (*in[2]);
15130                 double                  result  (0.0);
15131
15132                 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15133                         return false;
15134
15135                 if (edge0.isInf() || edge1.isInf() || x.isInf())
15136                         return false;
15137
15138                 if (getFlavor() == 0)
15139                 {
15140                         const float     edge0d  (edge0.asFloat());
15141                         const float     edge1d  (edge1.asFloat());
15142                         const float     xd              (x.asFloat());
15143                         const float     sstep   (deFloatSmoothStep(edge0d, edge1d, xd));
15144
15145                         result = sstep;
15146                 }
15147                 else if (getFlavor() == 1)
15148                 {
15149                         const double    edge0d  (edge0.asDouble());
15150                         const double    edge1d  (edge1.asDouble());
15151                         const double    xd              (x.asDouble());
15152
15153                         if (xd <= edge0d)
15154                                 result = 0.0;
15155                         else if (xd >= edge1d)
15156                                 result = 1.0;
15157                         else
15158                         {
15159                                 const fp16type  a       (xd - edge0d);
15160                                 const fp16type  b       (edge1d - edge0d);
15161                                 const fp16type  t       (a.asDouble() / b.asDouble());
15162                                 const fp16type  t2      (2.0 * t.asDouble());
15163                                 const fp16type  t3      (3.0 - t2.asDouble());
15164                                 const fp16type  t4      (t.asDouble() * t3.asDouble());
15165                                 const fp16type  t5      (t.asDouble() * t4.asDouble());
15166
15167                                 result = t5.asDouble();
15168                         }
15169                 }
15170                 else if (getFlavor() == 2)
15171                 {
15172                         const double    edge0d  (edge0.asDouble());
15173                         const double    edge1d  (edge1.asDouble());
15174                         const double    xd              (x.asDouble());
15175                         const fp16type  a       (xd - edge0d);
15176                         const fp16type  b       (edge1d - edge0d);
15177                         const fp16type  bi      (1.0 / b.asDouble());
15178                         const fp16type  t0      (a.asDouble() * bi.asDouble());
15179                         const double    tc      (deClamp(t0.asDouble(), 0.0, 1.0));
15180                         const fp16type  t       (tc);
15181                         const fp16type  t2      (2.0 * t.asDouble());
15182                         const fp16type  t3      (3.0 - t2.asDouble());
15183                         const fp16type  t4      (t.asDouble() * t3.asDouble());
15184                         const fp16type  t5      (t.asDouble() * t4.asDouble());
15185
15186                         result = t5.asDouble();
15187                 }
15188                 else
15189                 {
15190                         TCU_THROW(InternalError, "Unknown flavor");
15191                 }
15192
15193                 out[0] = fp16type(result).bits();
15194                 min[0] = getMin(result, getULPs(in));
15195                 max[0] = getMax(result, getULPs(in));
15196
15197                 return true;
15198         }
15199 };
15200
15201 struct fp16Fma : public fp16PerComponent
15202 {
15203         fp16Fma()
15204         {
15205                 flavorNames.push_back("DoubleCalc");
15206                 flavorNames.push_back("EmulatingFP16");
15207         }
15208
15209         virtual double getULPs(vector<const deFloat16*>& in)
15210         {
15211                 DE_UNREF(in);
15212
15213                 return 16.0;
15214         }
15215
15216         template<class fp16type>
15217         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15218         {
15219                 DE_ASSERT(in.size() == 3);
15220                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15221                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15222                 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15223                 DE_ASSERT(getOutCompCount() > 0);
15224
15225                 const fp16type  a               (*in[0]);
15226                 const fp16type  b               (*in[1]);
15227                 const fp16type  c               (*in[2]);
15228                 double                  result  (0.0);
15229
15230                 if (getFlavor() == 0)
15231                 {
15232                         const double    ad      (a.asDouble());
15233                         const double    bd      (b.asDouble());
15234                         const double    cd      (c.asDouble());
15235
15236                         result  = deMadd(ad, bd, cd);
15237                 }
15238                 else if (getFlavor() == 1)
15239                 {
15240                         const double    ad      (a.asDouble());
15241                         const double    bd      (b.asDouble());
15242                         const double    cd      (c.asDouble());
15243                         const fp16type  ab      (ad * bd);
15244                         const fp16type  r       (ab.asDouble() + cd);
15245
15246                         result  = r.asDouble();
15247                 }
15248                 else
15249                 {
15250                         TCU_THROW(InternalError, "Unknown flavor");
15251                 }
15252
15253                 out[0] = fp16type(result).bits();
15254                 min[0] = getMin(result, getULPs(in));
15255                 max[0] = getMax(result, getULPs(in));
15256
15257                 return true;
15258         }
15259 };
15260
15261
15262 struct fp16AllComponents : public fp16PerComponent
15263 {
15264         bool            callOncePerComponent    ()      { return false; }
15265 };
15266
15267 struct fp16Length : public fp16AllComponents
15268 {
15269         fp16Length() : fp16AllComponents()
15270         {
15271                 flavorNames.push_back("EmulatingFP16");
15272                 flavorNames.push_back("DoubleCalc");
15273         }
15274
15275         virtual double getULPs(vector<const deFloat16*>& in)
15276         {
15277                 DE_UNREF(in);
15278
15279                 return 4.0;
15280         }
15281
15282         template<class fp16type>
15283         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15284         {
15285                 DE_ASSERT(getOutCompCount() == 1);
15286                 DE_ASSERT(in.size() == 1);
15287
15288                 double  result  (0.0);
15289
15290                 if (getFlavor() == 0)
15291                 {
15292                         fp16type        r       (0.0);
15293
15294                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15295                         {
15296                                 const fp16type  x       (in[0][componentNdx]);
15297                                 const fp16type  q       (x.asDouble() * x.asDouble());
15298
15299                                 r = fp16type(r.asDouble() + q.asDouble());
15300                         }
15301
15302                         result = deSqrt(r.asDouble());
15303
15304                         out[0] = fp16type(result).bits();
15305                 }
15306                 else if (getFlavor() == 1)
15307                 {
15308                         double  r       (0.0);
15309
15310                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15311                         {
15312                                 const fp16type  x       (in[0][componentNdx]);
15313                                 const double    q       (x.asDouble() * x.asDouble());
15314
15315                                 r += q;
15316                         }
15317
15318                         result = deSqrt(r);
15319
15320                         out[0] = fp16type(result).bits();
15321                 }
15322                 else
15323                 {
15324                         TCU_THROW(InternalError, "Unknown flavor");
15325                 }
15326
15327                 min[0] = getMin(result, getULPs(in));
15328                 max[0] = getMax(result, getULPs(in));
15329
15330                 return true;
15331         }
15332 };
15333
15334 struct fp16Distance : public fp16AllComponents
15335 {
15336         fp16Distance() : fp16AllComponents()
15337         {
15338                 flavorNames.push_back("EmulatingFP16");
15339                 flavorNames.push_back("DoubleCalc");
15340         }
15341
15342         virtual double getULPs(vector<const deFloat16*>& in)
15343         {
15344                 DE_UNREF(in);
15345
15346                 return 4.0;
15347         }
15348
15349         template<class fp16type>
15350         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15351         {
15352                 DE_ASSERT(getOutCompCount() == 1);
15353                 DE_ASSERT(in.size() == 2);
15354                 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15355
15356                 double  result  (0.0);
15357
15358                 if (getFlavor() == 0)
15359                 {
15360                         fp16type        r       (0.0);
15361
15362                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15363                         {
15364                                 const fp16type  x       (in[0][componentNdx]);
15365                                 const fp16type  y       (in[1][componentNdx]);
15366                                 const fp16type  d       (x.asDouble() - y.asDouble());
15367                                 const fp16type  q       (d.asDouble() * d.asDouble());
15368
15369                                 r = fp16type(r.asDouble() + q.asDouble());
15370                         }
15371
15372                         result = deSqrt(r.asDouble());
15373                 }
15374                 else if (getFlavor() == 1)
15375                 {
15376                         double  r       (0.0);
15377
15378                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15379                         {
15380                                 const fp16type  x       (in[0][componentNdx]);
15381                                 const fp16type  y       (in[1][componentNdx]);
15382                                 const double    d       (x.asDouble() - y.asDouble());
15383                                 const double    q       (d * d);
15384
15385                                 r += q;
15386                         }
15387
15388                         result = deSqrt(r);
15389                 }
15390                 else
15391                 {
15392                         TCU_THROW(InternalError, "Unknown flavor");
15393                 }
15394
15395                 out[0] = fp16type(result).bits();
15396                 min[0] = getMin(result, getULPs(in));
15397                 max[0] = getMax(result, getULPs(in));
15398
15399                 return true;
15400         }
15401 };
15402
15403 struct fp16Cross : public fp16AllComponents
15404 {
15405         fp16Cross() : fp16AllComponents()
15406         {
15407                 flavorNames.push_back("EmulatingFP16");
15408                 flavorNames.push_back("DoubleCalc");
15409         }
15410
15411         virtual double getULPs(vector<const deFloat16*>& in)
15412         {
15413                 DE_UNREF(in);
15414
15415                 return 4.0;
15416         }
15417
15418         template<class fp16type>
15419         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15420         {
15421                 DE_ASSERT(getOutCompCount() == 3);
15422                 DE_ASSERT(in.size() == 2);
15423                 DE_ASSERT(getArgCompCount(0) == 3);
15424                 DE_ASSERT(getArgCompCount(1) == 3);
15425
15426                 if (getFlavor() == 0)
15427                 {
15428                         const fp16type  x0              (in[0][0]);
15429                         const fp16type  x1              (in[0][1]);
15430                         const fp16type  x2              (in[0][2]);
15431                         const fp16type  y0              (in[1][0]);
15432                         const fp16type  y1              (in[1][1]);
15433                         const fp16type  y2              (in[1][2]);
15434                         const fp16type  x1y2    (x1.asDouble() * y2.asDouble());
15435                         const fp16type  y1x2    (y1.asDouble() * x2.asDouble());
15436                         const fp16type  x2y0    (x2.asDouble() * y0.asDouble());
15437                         const fp16type  y2x0    (y2.asDouble() * x0.asDouble());
15438                         const fp16type  x0y1    (x0.asDouble() * y1.asDouble());
15439                         const fp16type  y0x1    (y0.asDouble() * x1.asDouble());
15440
15441                         out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
15442                         out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
15443                         out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
15444                 }
15445                 else if (getFlavor() == 1)
15446                 {
15447                         const fp16type  x0              (in[0][0]);
15448                         const fp16type  x1              (in[0][1]);
15449                         const fp16type  x2              (in[0][2]);
15450                         const fp16type  y0              (in[1][0]);
15451                         const fp16type  y1              (in[1][1]);
15452                         const fp16type  y2              (in[1][2]);
15453                         const double    x1y2    (x1.asDouble() * y2.asDouble());
15454                         const double    y1x2    (y1.asDouble() * x2.asDouble());
15455                         const double    x2y0    (x2.asDouble() * y0.asDouble());
15456                         const double    y2x0    (y2.asDouble() * x0.asDouble());
15457                         const double    x0y1    (x0.asDouble() * y1.asDouble());
15458                         const double    y0x1    (y0.asDouble() * x1.asDouble());
15459
15460                         out[0] = fp16type(x1y2 - y1x2).bits();
15461                         out[1] = fp16type(x2y0 - y2x0).bits();
15462                         out[2] = fp16type(x0y1 - y0x1).bits();
15463                 }
15464                 else
15465                 {
15466                         TCU_THROW(InternalError, "Unknown flavor");
15467                 }
15468
15469                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15470                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15471                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15472                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15473
15474                 return true;
15475         }
15476 };
15477
15478 struct fp16Normalize : public fp16AllComponents
15479 {
15480         fp16Normalize() : fp16AllComponents()
15481         {
15482                 flavorNames.push_back("EmulatingFP16");
15483                 flavorNames.push_back("DoubleCalc");
15484
15485                 // flavorNames will be extended later
15486         }
15487
15488         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)
15489         {
15490                 DE_ASSERT(argCompCount[argNo] == 0); // Once only
15491
15492                 if (argNo == 0 && argCompCount[argNo] == 0)
15493                 {
15494                         const size_t            maxPermutationsCount    = 24u; // Equal to 4!
15495                         std::vector<int>        indices;
15496
15497                         for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
15498                                 indices.push_back(static_cast<int>(componentNdx));
15499
15500                         m_permutations.reserve(maxPermutationsCount);
15501
15502                         permutationsFlavorStart = flavorNames.size();
15503
15504                         do
15505                         {
15506                                 tcu::UVec4      permutation;
15507                                 std::string     name            = "Permutted_";
15508
15509                                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
15510                                 {
15511                                         permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
15512                                         name += de::toString(indices[componentNdx]);
15513                                 }
15514
15515                                 m_permutations.push_back(permutation);
15516                                 flavorNames.push_back(name);
15517
15518                         } while(std::next_permutation(indices.begin(), indices.end()));
15519
15520                         permutationsFlavorEnd = flavorNames.size();
15521                 }
15522
15523                 fp16AllComponents::setArgCompCount(argNo, compCount);
15524         }
15525         virtual double getULPs(vector<const deFloat16*>& in)
15526         {
15527                 DE_UNREF(in);
15528
15529                 return 8.0;
15530         }
15531
15532         template<class fp16type>
15533         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15534         {
15535                 DE_ASSERT(in.size() == 1);
15536                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15537
15538                 if (getFlavor() == 0)
15539                 {
15540                         fp16type        r(0.0);
15541
15542                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15543                         {
15544                                 const fp16type  x       (in[0][componentNdx]);
15545                                 const fp16type  q       (x.asDouble() * x.asDouble());
15546
15547                                 r = fp16type(r.asDouble() + q.asDouble());
15548                         }
15549
15550                         r = fp16type(deSqrt(r.asDouble()));
15551
15552                         if (r.isZero())
15553                                 return false;
15554
15555                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15556                         {
15557                                 const fp16type  x       (in[0][componentNdx]);
15558
15559                                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
15560                         }
15561                 }
15562                 else if (getFlavor() == 1)
15563                 {
15564                         double  r(0.0);
15565
15566                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15567                         {
15568                                 const fp16type  x       (in[0][componentNdx]);
15569                                 const double    q       (x.asDouble() * x.asDouble());
15570
15571                                 r += q;
15572                         }
15573
15574                         r = deSqrt(r);
15575
15576                         if (r == 0)
15577                                 return false;
15578
15579                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15580                         {
15581                                 const fp16type  x       (in[0][componentNdx]);
15582
15583                                 out[componentNdx] = fp16type(x.asDouble() / r).bits();
15584                         }
15585                 }
15586                 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
15587                 {
15588                         const int                       compCount               (static_cast<int>(getArgCompCount(0)));
15589                         const size_t            permutationNdx  (getFlavor() - permutationsFlavorStart);
15590                         const tcu::UVec4&       permutation             (m_permutations[permutationNdx]);
15591                         fp16type                        r                               (0.0);
15592
15593                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
15594                         {
15595                                 const size_t    componentNdx    (permutation[permComponentNdx]);
15596                                 const fp16type  x                               (in[0][componentNdx]);
15597                                 const fp16type  q                               (x.asDouble() * x.asDouble());
15598
15599                                 r = fp16type(r.asDouble() + q.asDouble());
15600                         }
15601
15602                         r = fp16type(deSqrt(r.asDouble()));
15603
15604                         if (r.isZero())
15605                                 return false;
15606
15607                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
15608                         {
15609                                 const size_t    componentNdx    (permutation[permComponentNdx]);
15610                                 const fp16type  x                               (in[0][componentNdx]);
15611
15612                                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
15613                         }
15614                 }
15615                 else
15616                 {
15617                         TCU_THROW(InternalError, "Unknown flavor");
15618                 }
15619
15620                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15621                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15622                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15623                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15624
15625                 return true;
15626         }
15627
15628 private:
15629         std::vector<tcu::UVec4> m_permutations;
15630         size_t                                  permutationsFlavorStart;
15631         size_t                                  permutationsFlavorEnd;
15632 };
15633
15634 struct fp16FaceForward : public fp16AllComponents
15635 {
15636         virtual double getULPs(vector<const deFloat16*>& in)
15637         {
15638                 DE_UNREF(in);
15639
15640                 return 4.0;
15641         }
15642
15643         template<class fp16type>
15644         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15645         {
15646                 DE_ASSERT(in.size() == 3);
15647                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15648                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15649                 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15650
15651                 fp16type        dp(0.0);
15652
15653                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15654                 {
15655                         const fp16type  x       (in[1][componentNdx]);
15656                         const fp16type  y       (in[2][componentNdx]);
15657                         const double    xd      (x.asDouble());
15658                         const double    yd      (y.asDouble());
15659                         const fp16type  q       (xd * yd);
15660
15661                         dp = fp16type(dp.asDouble() + q.asDouble());
15662                 }
15663
15664                 if (dp.isNaN() || dp.isZero())
15665                         return false;
15666
15667                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15668                 {
15669                         const fp16type  n       (in[0][componentNdx]);
15670
15671                         out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
15672                 }
15673
15674                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15675                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15676                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15677                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15678
15679                 return true;
15680         }
15681 };
15682
15683 struct fp16Reflect : public fp16AllComponents
15684 {
15685         fp16Reflect() : fp16AllComponents()
15686         {
15687                 flavorNames.push_back("EmulatingFP16");
15688                 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
15689                 flavorNames.push_back("FloatCalc");
15690                 flavorNames.push_back("FloatCalc+KeepZeroSign");
15691                 flavorNames.push_back("EmulatingFP16+2Nfirst");
15692                 flavorNames.push_back("EmulatingFP16+2Ifirst");
15693         }
15694
15695         virtual double getULPs(vector<const deFloat16*>& in)
15696         {
15697                 DE_UNREF(in);
15698
15699                 return 256.0; // This is not a precision test. Value is not from spec
15700         }
15701
15702         template<class fp16type>
15703         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15704         {
15705                 DE_ASSERT(in.size() == 2);
15706                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15707                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15708
15709                 if (getFlavor() < 4)
15710                 {
15711                         const bool      keepZeroSign    ((flavor & 1) != 0 ? true : false);
15712                         const bool      floatCalc               ((flavor & 2) != 0 ? true : false);
15713
15714                         if (floatCalc)
15715                         {
15716                                 float   dp(0.0f);
15717
15718                                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15719                                 {
15720                                         const fp16type  i       (in[0][componentNdx]);
15721                                         const fp16type  n       (in[1][componentNdx]);
15722                                         const float             id      (i.asFloat());
15723                                         const float             nd      (n.asFloat());
15724                                         const float             qd      (id * nd);
15725
15726                                         if (keepZeroSign)
15727                                                 dp = (componentNdx == 0) ? qd : dp + qd;
15728                                         else
15729                                                 dp = dp + qd;
15730                                 }
15731
15732                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15733                                 {
15734                                         const fp16type  i               (in[0][componentNdx]);
15735                                         const fp16type  n               (in[1][componentNdx]);
15736                                         const float             dpnd    (dp * n.asFloat());
15737                                         const float             dpn2d   (2.0f * dpnd);
15738                                         const float             idpn2d  (i.asFloat() - dpn2d);
15739                                         const fp16type  result  (idpn2d);
15740
15741                                         out[componentNdx] = result.bits();
15742                                 }
15743                         }
15744                         else
15745                         {
15746                                 fp16type        dp(0.0);
15747
15748                                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15749                                 {
15750                                         const fp16type  i       (in[0][componentNdx]);
15751                                         const fp16type  n       (in[1][componentNdx]);
15752                                         const double    id      (i.asDouble());
15753                                         const double    nd      (n.asDouble());
15754                                         const fp16type  q       (id * nd);
15755
15756                                         if (keepZeroSign)
15757                                                 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
15758                                         else
15759                                                 dp = fp16type(dp.asDouble() + q.asDouble());
15760                                 }
15761
15762                                 if (dp.isNaN())
15763                                         return false;
15764
15765                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15766                                 {
15767                                         const fp16type  i               (in[0][componentNdx]);
15768                                         const fp16type  n               (in[1][componentNdx]);
15769                                         const fp16type  dpn             (dp.asDouble() * n.asDouble());
15770                                         const fp16type  dpn2    (2 * dpn.asDouble());
15771                                         const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
15772
15773                                         out[componentNdx] = idpn2.bits();
15774                                 }
15775                         }
15776                 }
15777                 else if (getFlavor() == 4)
15778                 {
15779                         fp16type        dp(0.0);
15780
15781                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15782                         {
15783                                 const fp16type  i       (in[0][componentNdx]);
15784                                 const fp16type  n       (in[1][componentNdx]);
15785                                 const double    id      (i.asDouble());
15786                                 const double    nd      (n.asDouble());
15787                                 const fp16type  q       (id * nd);
15788
15789                                 dp = fp16type(dp.asDouble() + q.asDouble());
15790                         }
15791
15792                         if (dp.isNaN())
15793                                 return false;
15794
15795                         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15796                         {
15797                                 const fp16type  i               (in[0][componentNdx]);
15798                                 const fp16type  n               (in[1][componentNdx]);
15799                                 const fp16type  n2              (2 * n.asDouble());
15800                                 const fp16type  dpn2    (dp.asDouble() * n2.asDouble());
15801                                 const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
15802
15803                                 out[componentNdx] = idpn2.bits();
15804                         }
15805                 }
15806                 else if (getFlavor() == 5)
15807                 {
15808                         fp16type        dp2(0.0);
15809
15810                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15811                         {
15812                                 const fp16type  i       (in[0][componentNdx]);
15813                                 const fp16type  n       (in[1][componentNdx]);
15814                                 const fp16type  i2      (2.0 * i.asDouble());
15815                                 const double    i2d     (i2.asDouble());
15816                                 const double    nd      (n.asDouble());
15817                                 const fp16type  q       (i2d * nd);
15818
15819                                 dp2 = fp16type(dp2.asDouble() + q.asDouble());
15820                         }
15821
15822                         if (dp2.isNaN())
15823                                 return false;
15824
15825                         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15826                         {
15827                                 const fp16type  i               (in[0][componentNdx]);
15828                                 const fp16type  n               (in[1][componentNdx]);
15829                                 const fp16type  dpn2    (dp2.asDouble() * n.asDouble());
15830                                 const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
15831
15832                                 out[componentNdx] = idpn2.bits();
15833                         }
15834                 }
15835                 else
15836                 {
15837                         TCU_THROW(InternalError, "Unknown flavor");
15838                 }
15839
15840                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15841                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15842                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15843                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15844
15845                 return true;
15846         }
15847 };
15848
15849 struct fp16Refract : public fp16AllComponents
15850 {
15851         fp16Refract() : fp16AllComponents()
15852         {
15853                 flavorNames.push_back("EmulatingFP16");
15854                 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
15855                 flavorNames.push_back("FloatCalc");
15856                 flavorNames.push_back("FloatCalc+KeepZeroSign");
15857         }
15858
15859         virtual double getULPs(vector<const deFloat16*>& in)
15860         {
15861                 DE_UNREF(in);
15862
15863                 return 8192.0; // This is not a precision test. Value is not from spec
15864         }
15865
15866         template<class fp16type>
15867         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15868         {
15869                 DE_ASSERT(in.size() == 3);
15870                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15871                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15872                 DE_ASSERT(getArgCompCount(2) == 1);
15873
15874                 const bool              keepZeroSign    ((flavor & 1) != 0 ? true : false);
15875                 const bool              doubleCalc              ((flavor & 2) != 0 ? true : false);
15876                 const fp16type  eta                             (*in[2]);
15877
15878                 if (doubleCalc)
15879                 {
15880                         double  dp      (0.0);
15881
15882                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15883                         {
15884                                 const fp16type  i       (in[0][componentNdx]);
15885                                 const fp16type  n       (in[1][componentNdx]);
15886                                 const double    id      (i.asDouble());
15887                                 const double    nd      (n.asDouble());
15888                                 const double    qd      (id * nd);
15889
15890                                 if (keepZeroSign)
15891                                         dp = (componentNdx == 0) ? qd : dp + qd;
15892                                 else
15893                                         dp = dp + qd;
15894                         }
15895
15896                         const double    eta2    (eta.asDouble() * eta.asDouble());
15897                         const double    dp2             (dp * dp);
15898                         const double    dp1             (1.0 - dp2);
15899                         const double    dpe             (eta2 * dp1);
15900                         const double    k               (1.0 - dpe);
15901
15902                         if (k < 0.0)
15903                         {
15904                                 const fp16type  zero    (0.0);
15905
15906                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15907                                         out[componentNdx] = zero.bits();
15908                         }
15909                         else
15910                         {
15911                                 const double    sk      (deSqrt(k));
15912
15913                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15914                                 {
15915                                         const fp16type  i               (in[0][componentNdx]);
15916                                         const fp16type  n               (in[1][componentNdx]);
15917                                         const double    etai    (i.asDouble() * eta.asDouble());
15918                                         const double    etadp   (eta.asDouble() * dp);
15919                                         const double    etadpk  (etadp + sk);
15920                                         const double    etadpkn (etadpk * n.asDouble());
15921                                         const double    full    (etai - etadpkn);
15922                                         const fp16type  result  (full);
15923
15924                                         if (result.isInf())
15925                                                 return false;
15926
15927                                         out[componentNdx] = result.bits();
15928                                 }
15929                         }
15930                 }
15931                 else
15932                 {
15933                         fp16type        dp      (0.0);
15934
15935                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15936                         {
15937                                 const fp16type  i       (in[0][componentNdx]);
15938                                 const fp16type  n       (in[1][componentNdx]);
15939                                 const double    id      (i.asDouble());
15940                                 const double    nd      (n.asDouble());
15941                                 const fp16type  q       (id * nd);
15942
15943                                 if (keepZeroSign)
15944                                         dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
15945                                 else
15946                                         dp = fp16type(dp.asDouble() + q.asDouble());
15947                         }
15948
15949                         if (dp.isNaN())
15950                                 return false;
15951
15952                         const fp16type  eta2(eta.asDouble() * eta.asDouble());
15953                         const fp16type  dp2     (dp.asDouble() * dp.asDouble());
15954                         const fp16type  dp1     (1.0 - dp2.asDouble());
15955                         const fp16type  dpe     (eta2.asDouble() * dp1.asDouble());
15956                         const fp16type  k       (1.0 - dpe.asDouble());
15957
15958                         if (k.asDouble() < 0.0)
15959                         {
15960                                 const fp16type  zero    (0.0);
15961
15962                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15963                                         out[componentNdx] = zero.bits();
15964                         }
15965                         else
15966                         {
15967                                 const fp16type  sk      (deSqrt(k.asDouble()));
15968
15969                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15970                                 {
15971                                         const fp16type  i               (in[0][componentNdx]);
15972                                         const fp16type  n               (in[1][componentNdx]);
15973                                         const fp16type  etai    (i.asDouble() * eta.asDouble());
15974                                         const fp16type  etadp   (eta.asDouble() * dp.asDouble());
15975                                         const fp16type  etadpk  (etadp.asDouble() + sk.asDouble());
15976                                         const fp16type  etadpkn (etadpk.asDouble() * n.asDouble());
15977                                         const fp16type  full    (etai.asDouble() - etadpkn.asDouble());
15978
15979                                         if (full.isNaN() || full.isInf())
15980                                                 return false;
15981
15982                                         out[componentNdx] = full.bits();
15983                                 }
15984                         }
15985                 }
15986
15987                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15988                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15989                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15990                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15991
15992                 return true;
15993         }
15994 };
15995
15996 struct fp16Dot : public fp16AllComponents
15997 {
15998         fp16Dot() : fp16AllComponents()
15999         {
16000                 flavorNames.push_back("EmulatingFP16");
16001                 flavorNames.push_back("FloatCalc");
16002                 flavorNames.push_back("DoubleCalc");
16003
16004                 // flavorNames will be extended later
16005         }
16006
16007         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)
16008         {
16009                 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16010
16011                 if (argNo == 0 && argCompCount[argNo] == 0)
16012                 {
16013                         const size_t            maxPermutationsCount    = 24u; // Equal to 4!
16014                         std::vector<int>        indices;
16015
16016                         for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16017                                 indices.push_back(static_cast<int>(componentNdx));
16018
16019                         m_permutations.reserve(maxPermutationsCount);
16020
16021                         permutationsFlavorStart = flavorNames.size();
16022
16023                         do
16024                         {
16025                                 tcu::UVec4      permutation;
16026                                 std::string     name            = "Permutted_";
16027
16028                                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16029                                 {
16030                                         permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16031                                         name += de::toString(indices[componentNdx]);
16032                                 }
16033
16034                                 m_permutations.push_back(permutation);
16035                                 flavorNames.push_back(name);
16036
16037                         } while(std::next_permutation(indices.begin(), indices.end()));
16038
16039                         permutationsFlavorEnd = flavorNames.size();
16040                 }
16041
16042                 fp16AllComponents::setArgCompCount(argNo, compCount);
16043         }
16044
16045         virtual double  getULPs(vector<const deFloat16*>& in)
16046         {
16047                 DE_UNREF(in);
16048
16049                 return 16.0; // This is not a precision test. Value is not from spec
16050         }
16051
16052         template<class fp16type>
16053         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16054         {
16055                 DE_ASSERT(in.size() == 2);
16056                 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16057                 DE_ASSERT(getOutCompCount() == 1);
16058
16059                 double  result  (0.0);
16060                 double  eps             (0.0);
16061
16062                 if (getFlavor() == 0)
16063                 {
16064                         fp16type        dp      (0.0);
16065
16066                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16067                         {
16068                                 const fp16type  x       (in[0][componentNdx]);
16069                                 const fp16type  y       (in[1][componentNdx]);
16070                                 const fp16type  q       (x.asDouble() * y.asDouble());
16071
16072                                 dp = fp16type(dp.asDouble() + q.asDouble());
16073                                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16074                         }
16075
16076                         result = dp.asDouble();
16077                 }
16078                 else if (getFlavor() == 1)
16079                 {
16080                         float   dp      (0.0);
16081
16082                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16083                         {
16084                                 const fp16type  x       (in[0][componentNdx]);
16085                                 const fp16type  y       (in[1][componentNdx]);
16086                                 const float             q       (x.asFloat() * y.asFloat());
16087
16088                                 dp += q;
16089                                 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16090                         }
16091
16092                         result = dp;
16093                 }
16094                 else if (getFlavor() == 2)
16095                 {
16096                         double  dp      (0.0);
16097
16098                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16099                         {
16100                                 const fp16type  x       (in[0][componentNdx]);
16101                                 const fp16type  y       (in[1][componentNdx]);
16102                                 const double    q       (x.asDouble() * y.asDouble());
16103
16104                                 dp += q;
16105                                 eps += floatFormat16.ulp(q, 2.0);
16106                         }
16107
16108                         result = dp;
16109                 }
16110                 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16111                 {
16112                         const int                       compCount               (static_cast<int>(getArgCompCount(1)));
16113                         const size_t            permutationNdx  (getFlavor() - permutationsFlavorStart);
16114                         const tcu::UVec4&       permutation             (m_permutations[permutationNdx]);
16115                         fp16type                        dp                              (0.0);
16116
16117                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16118                         {
16119                                 const size_t            componentNdx    (permutation[permComponentNdx]);
16120                                 const fp16type          x                               (in[0][componentNdx]);
16121                                 const fp16type          y                               (in[1][componentNdx]);
16122                                 const fp16type          q                               (x.asDouble() * y.asDouble());
16123
16124                                 dp = fp16type(dp.asDouble() + q.asDouble());
16125                                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16126                         }
16127
16128                         result = dp.asDouble();
16129                 }
16130                 else
16131                 {
16132                         TCU_THROW(InternalError, "Unknown flavor");
16133                 }
16134
16135                 out[0] = fp16type(result).bits();
16136                 min[0] = result - eps;
16137                 max[0] = result + eps;
16138
16139                 return true;
16140         }
16141
16142 private:
16143         std::vector<tcu::UVec4> m_permutations;
16144         size_t                                  permutationsFlavorStart;
16145         size_t                                  permutationsFlavorEnd;
16146 };
16147
16148 struct fp16VectorTimesScalar : public fp16AllComponents
16149 {
16150         virtual double getULPs(vector<const deFloat16*>& in)
16151         {
16152                 DE_UNREF(in);
16153
16154                 return 2.0;
16155         }
16156
16157         template<class fp16type>
16158         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16159         {
16160                 DE_ASSERT(in.size() == 2);
16161                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16162                 DE_ASSERT(getArgCompCount(1) == 1);
16163
16164                 fp16type        s       (*in[1]);
16165
16166                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16167                 {
16168                         const fp16type  x          (in[0][componentNdx]);
16169                         const double    result (s.asDouble() * x.asDouble());
16170                         const fp16type  m          (result);
16171
16172                         out[componentNdx] = m.bits();
16173                         min[componentNdx] = getMin(result, getULPs(in));
16174                         max[componentNdx] = getMax(result, getULPs(in));
16175                 }
16176
16177                 return true;
16178         }
16179 };
16180
16181 struct fp16MatrixBase : public fp16AllComponents
16182 {
16183         deUint32                getComponentValidity                    ()
16184         {
16185                 return static_cast<deUint32>(-1);
16186         }
16187
16188         inline size_t   getNdx                                                  (const size_t rowCount, const size_t col, const size_t row)
16189         {
16190                 const size_t minComponentCount  = 0;
16191                 const size_t maxComponentCount  = 3;
16192                 const size_t alignedRowsCount   = (rowCount == 3) ? 4 : rowCount;
16193
16194                 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16195                 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16196                 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16197                 DE_UNREF(minComponentCount);
16198                 DE_UNREF(maxComponentCount);
16199
16200                 return col * alignedRowsCount + row;
16201         }
16202
16203         deUint32                getComponentMatrixValidityMask  (size_t cols, size_t rows)
16204         {
16205                 deUint32        result  = 0u;
16206
16207                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16208                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16209                         {
16210                                 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16211
16212                                 DE_ASSERT(bitNdx < sizeof(result) * 8);
16213
16214                                 result |= (1<<bitNdx);
16215                         }
16216
16217                 return result;
16218         }
16219 };
16220
16221 template<size_t cols, size_t rows>
16222 struct fp16Transpose : public fp16MatrixBase
16223 {
16224         virtual double getULPs(vector<const deFloat16*>& in)
16225         {
16226                 DE_UNREF(in);
16227
16228                 return 1.0;
16229         }
16230
16231         deUint32        getComponentValidity    ()
16232         {
16233                 return getComponentMatrixValidityMask(rows, cols);
16234         }
16235
16236         template<class fp16type>
16237         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16238         {
16239                 DE_ASSERT(in.size() == 1);
16240
16241                 const size_t            alignedCols     = (cols == 3) ? 4 : cols;
16242                 const size_t            alignedRows     = (rows == 3) ? 4 : rows;
16243                 vector<deFloat16>       output          (alignedCols * alignedRows, 0);
16244
16245                 DE_ASSERT(output.size() == alignedCols * alignedRows);
16246
16247                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16248                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16249                                 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16250
16251                 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16252                 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16253                 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16254
16255                 return true;
16256         }
16257 };
16258
16259 template<size_t cols, size_t rows>
16260 struct fp16MatrixTimesScalar : public fp16MatrixBase
16261 {
16262         virtual double getULPs(vector<const deFloat16*>& in)
16263         {
16264                 DE_UNREF(in);
16265
16266                 return 4.0;
16267         }
16268
16269         deUint32        getComponentValidity    ()
16270         {
16271                 return getComponentMatrixValidityMask(cols, rows);
16272         }
16273
16274         template<class fp16type>
16275         bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16276         {
16277                 DE_ASSERT(in.size() == 2);
16278                 DE_ASSERT(getArgCompCount(1) == 1);
16279
16280                 const fp16type  y                       (in[1][0]);
16281                 const float             scalar          (y.asFloat());
16282                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16283                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16284
16285                 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16286                 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16287                 DE_UNREF(alignedCols);
16288
16289                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16290                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16291                         {
16292                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
16293                                 const fp16type  x       (in[0][ndx]);
16294                                 const double    result  (scalar * x.asFloat());
16295
16296                                 out[ndx] = fp16type(result).bits();
16297                                 min[ndx] = getMin(result, getULPs(in));
16298                                 max[ndx] = getMax(result, getULPs(in));
16299                         }
16300
16301                 return true;
16302         }
16303 };
16304
16305 template<size_t cols, size_t rows>
16306 struct fp16VectorTimesMatrix : public fp16MatrixBase
16307 {
16308         fp16VectorTimesMatrix() : fp16MatrixBase()
16309         {
16310                 flavorNames.push_back("EmulatingFP16");
16311                 flavorNames.push_back("FloatCalc");
16312         }
16313
16314         virtual double getULPs (vector<const deFloat16*>& in)
16315         {
16316                 DE_UNREF(in);
16317
16318                 return (8.0 * cols);
16319         }
16320
16321         deUint32 getComponentValidity ()
16322         {
16323                 return getComponentMatrixValidityMask(cols, 1);
16324         }
16325
16326         template<class fp16type>
16327         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16328         {
16329                 DE_ASSERT(in.size() == 2);
16330
16331                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16332                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16333
16334                 DE_ASSERT(getOutCompCount() == cols);
16335                 DE_ASSERT(getArgCompCount(0) == rows);
16336                 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16337                 DE_UNREF(alignedCols);
16338
16339                 if (getFlavor() == 0)
16340                 {
16341                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16342                         {
16343                                 fp16type        s       (fp16type::zero(1));
16344
16345                                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16346                                 {
16347                                         const fp16type  v       (in[0][rowNdx]);
16348                                         const float             vf      (v.asFloat());
16349                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16350                                         const fp16type  x       (in[1][ndx]);
16351                                         const float             xf      (x.asFloat());
16352                                         const fp16type  m       (vf * xf);
16353
16354                                         s = fp16type(s.asFloat() + m.asFloat());
16355                                 }
16356
16357                                 out[colNdx] = s.bits();
16358                                 min[colNdx] = getMin(s.asDouble(), getULPs(in));
16359                                 max[colNdx] = getMax(s.asDouble(), getULPs(in));
16360                         }
16361                 }
16362                 else if (getFlavor() == 1)
16363                 {
16364                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16365                         {
16366                                 float   s       (0.0f);
16367
16368                                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16369                                 {
16370                                         const fp16type  v       (in[0][rowNdx]);
16371                                         const float             vf      (v.asFloat());
16372                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16373                                         const fp16type  x       (in[1][ndx]);
16374                                         const float             xf      (x.asFloat());
16375                                         const float             m       (vf * xf);
16376
16377                                         s += m;
16378                                 }
16379
16380                                 out[colNdx] = fp16type(s).bits();
16381                                 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
16382                                 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
16383                         }
16384                 }
16385                 else
16386                 {
16387                         TCU_THROW(InternalError, "Unknown flavor");
16388                 }
16389
16390                 return true;
16391         }
16392 };
16393
16394 template<size_t cols, size_t rows>
16395 struct fp16MatrixTimesVector : public fp16MatrixBase
16396 {
16397         fp16MatrixTimesVector() : fp16MatrixBase()
16398         {
16399                 flavorNames.push_back("EmulatingFP16");
16400                 flavorNames.push_back("FloatCalc");
16401         }
16402
16403         virtual double getULPs (vector<const deFloat16*>& in)
16404         {
16405                 DE_UNREF(in);
16406
16407                 return (8.0 * rows);
16408         }
16409
16410         deUint32 getComponentValidity ()
16411         {
16412                 return getComponentMatrixValidityMask(rows, 1);
16413         }
16414
16415         template<class fp16type>
16416         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16417         {
16418                 DE_ASSERT(in.size() == 2);
16419
16420                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16421                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16422
16423                 DE_ASSERT(getOutCompCount() == rows);
16424                 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16425                 DE_ASSERT(getArgCompCount(1) == cols);
16426                 DE_UNREF(alignedCols);
16427
16428                 if (getFlavor() == 0)
16429                 {
16430                         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16431                         {
16432                                 fp16type        s       (fp16type::zero(1));
16433
16434                                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16435                                 {
16436                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16437                                         const fp16type  x       (in[0][ndx]);
16438                                         const float             xf      (x.asFloat());
16439                                         const fp16type  v       (in[1][colNdx]);
16440                                         const float             vf      (v.asFloat());
16441                                         const fp16type  m       (vf * xf);
16442
16443                                         s = fp16type(s.asFloat() + m.asFloat());
16444                                 }
16445
16446                                 out[rowNdx] = s.bits();
16447                                 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
16448                                 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
16449                         }
16450                 }
16451                 else if (getFlavor() == 1)
16452                 {
16453                         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16454                         {
16455                                 float   s       (0.0f);
16456
16457                                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16458                                 {
16459                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
16460                                         const fp16type  x       (in[0][ndx]);
16461                                         const float             xf      (x.asFloat());
16462                                         const fp16type  v       (in[1][colNdx]);
16463                                         const float             vf      (v.asFloat());
16464                                         const float             m       (vf * xf);
16465
16466                                         s += m;
16467                                 }
16468
16469                                 out[rowNdx] = fp16type(s).bits();
16470                                 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
16471                                 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
16472                         }
16473                 }
16474                 else
16475                 {
16476                         TCU_THROW(InternalError, "Unknown flavor");
16477                 }
16478
16479                 return true;
16480         }
16481 };
16482
16483 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
16484 struct fp16MatrixTimesMatrix : public fp16MatrixBase
16485 {
16486         fp16MatrixTimesMatrix() : fp16MatrixBase()
16487         {
16488                 flavorNames.push_back("EmulatingFP16");
16489                 flavorNames.push_back("FloatCalc");
16490         }
16491
16492         virtual double getULPs (vector<const deFloat16*>& in)
16493         {
16494                 DE_UNREF(in);
16495
16496                 return 32.0;
16497         }
16498
16499         deUint32 getComponentValidity ()
16500         {
16501                 return getComponentMatrixValidityMask(colsR, rowsL);
16502         }
16503
16504         template<class fp16type>
16505         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16506         {
16507                 DE_STATIC_ASSERT(colsL == rowsR);
16508
16509                 DE_ASSERT(in.size() == 2);
16510
16511                 const size_t    alignedColsL    = (colsL == 3) ? 4 : colsL;
16512                 const size_t    alignedRowsL    = (rowsL == 3) ? 4 : rowsL;
16513                 const size_t    alignedColsR    = (colsR == 3) ? 4 : colsR;
16514                 const size_t    alignedRowsR    = (rowsR == 3) ? 4 : rowsR;
16515
16516                 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
16517                 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
16518                 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
16519                 DE_UNREF(alignedColsL);
16520                 DE_UNREF(alignedColsR);
16521
16522                 if (getFlavor() == 0)
16523                 {
16524                         for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
16525                         {
16526                                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
16527                                 {
16528                                         const size_t    ndx     (colNdx * alignedRowsL + rowNdx);
16529                                         fp16type                s       (fp16type::zero(1));
16530
16531                                         for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
16532                                         {
16533                                                 const size_t    ndxl    (commonNdx * alignedRowsL + rowNdx);
16534                                                 const fp16type  l               (in[0][ndxl]);
16535                                                 const float             lf              (l.asFloat());
16536                                                 const size_t    ndxr    (colNdx * alignedRowsR + commonNdx);
16537                                                 const fp16type  r               (in[1][ndxr]);
16538                                                 const float             rf              (r.asFloat());
16539                                                 const fp16type  m               (lf * rf);
16540
16541                                                 s = fp16type(s.asFloat() + m.asFloat());
16542                                         }
16543
16544                                         out[ndx] = s.bits();
16545                                         min[ndx] = getMin(s.asDouble(), getULPs(in));
16546                                         max[ndx] = getMax(s.asDouble(), getULPs(in));
16547                                 }
16548                         }
16549                 }
16550                 else if (getFlavor() == 1)
16551                 {
16552                         for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
16553                         {
16554                                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
16555                                 {
16556                                         const size_t    ndx     (colNdx * alignedRowsL + rowNdx);
16557                                         float                   s       (0.0f);
16558
16559                                         for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
16560                                         {
16561                                                 const size_t    ndxl    (commonNdx * alignedRowsL + rowNdx);
16562                                                 const fp16type  l               (in[0][ndxl]);
16563                                                 const float             lf              (l.asFloat());
16564                                                 const size_t    ndxr    (colNdx * alignedRowsR + commonNdx);
16565                                                 const fp16type  r               (in[1][ndxr]);
16566                                                 const float             rf              (r.asFloat());
16567                                                 const float             m               (lf * rf);
16568
16569                                                 s += m;
16570                                         }
16571
16572                                         out[ndx] = fp16type(s).bits();
16573                                         min[ndx] = getMin(static_cast<double>(s), getULPs(in));
16574                                         max[ndx] = getMax(static_cast<double>(s), getULPs(in));
16575                                 }
16576                         }
16577                 }
16578                 else
16579                 {
16580                         TCU_THROW(InternalError, "Unknown flavor");
16581                 }
16582
16583                 return true;
16584         }
16585 };
16586
16587 template<size_t cols, size_t rows>
16588 struct fp16OuterProduct : public fp16MatrixBase
16589 {
16590         virtual double getULPs (vector<const deFloat16*>& in)
16591         {
16592                 DE_UNREF(in);
16593
16594                 return 2.0;
16595         }
16596
16597         deUint32 getComponentValidity ()
16598         {
16599                 return getComponentMatrixValidityMask(cols, rows);
16600         }
16601
16602         template<class fp16type>
16603         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16604         {
16605                 DE_ASSERT(in.size() == 2);
16606
16607                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16608                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16609
16610                 DE_ASSERT(getArgCompCount(0) == rows);
16611                 DE_ASSERT(getArgCompCount(1) == cols);
16612                 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16613                 DE_UNREF(alignedCols);
16614
16615                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16616                 {
16617                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16618                         {
16619                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
16620                                 const fp16type  x       (in[0][rowNdx]);
16621                                 const float             xf      (x.asFloat());
16622                                 const fp16type  y       (in[1][colNdx]);
16623                                 const float             yf      (y.asFloat());
16624                                 const fp16type  m       (xf * yf);
16625
16626                                 out[ndx] = m.bits();
16627                                 min[ndx] = getMin(m.asDouble(), getULPs(in));
16628                                 max[ndx] = getMax(m.asDouble(), getULPs(in));
16629                         }
16630                 }
16631
16632                 return true;
16633         }
16634 };
16635
16636 template<size_t size>
16637 struct fp16Determinant;
16638
16639 template<>
16640 struct fp16Determinant<2> : public fp16MatrixBase
16641 {
16642         virtual double getULPs (vector<const deFloat16*>& in)
16643         {
16644                 DE_UNREF(in);
16645
16646                 return 128.0; // This is not a precision test. Value is not from spec
16647         }
16648
16649         deUint32 getComponentValidity ()
16650         {
16651                 return 1;
16652         }
16653
16654         template<class fp16type>
16655         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16656         {
16657                 const size_t    cols            = 2;
16658                 const size_t    rows            = 2;
16659                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16660                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16661
16662                 DE_ASSERT(in.size() == 1);
16663                 DE_ASSERT(getOutCompCount() == 1);
16664                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
16665                 DE_UNREF(alignedCols);
16666                 DE_UNREF(alignedRows);
16667
16668                 // [ a b ]
16669                 // [ c d ]
16670                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
16671                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
16672                 const float             c               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
16673                 const float             d               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
16674                 const float             ad              (a * d);
16675                 const fp16type  adf16   (ad);
16676                 const float             bc              (b * c);
16677                 const fp16type  bcf16   (bc);
16678                 const float             r               (adf16.asFloat() - bcf16.asFloat());
16679                 const fp16type  rf16    (r);
16680
16681                 out[0] = rf16.bits();
16682                 min[0] = getMin(r, getULPs(in));
16683                 max[0] = getMax(r, getULPs(in));
16684
16685                 return true;
16686         }
16687 };
16688
16689 template<>
16690 struct fp16Determinant<3> : public fp16MatrixBase
16691 {
16692         virtual double getULPs (vector<const deFloat16*>& in)
16693         {
16694                 DE_UNREF(in);
16695
16696                 return 128.0; // This is not a precision test. Value is not from spec
16697         }
16698
16699         deUint32 getComponentValidity ()
16700         {
16701                 return 1;
16702         }
16703
16704         template<class fp16type>
16705         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16706         {
16707                 const size_t    cols            = 3;
16708                 const size_t    rows            = 3;
16709                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16710                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16711
16712                 DE_ASSERT(in.size() == 1);
16713                 DE_ASSERT(getOutCompCount() == 1);
16714                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
16715                 DE_UNREF(alignedCols);
16716                 DE_UNREF(alignedRows);
16717
16718                 // [ a b c ]
16719                 // [ d e f ]
16720                 // [ g h i ]
16721                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
16722                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
16723                 const float             c               (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
16724                 const float             d               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
16725                 const float             e               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
16726                 const float             f               (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
16727                 const float             g               (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
16728                 const float             h               (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
16729                 const float             i               (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
16730                 const fp16type  aei             (a * e * i);
16731                 const fp16type  bfg             (b * f * g);
16732                 const fp16type  cdh             (c * d * h);
16733                 const fp16type  ceg             (c * e * g);
16734                 const fp16type  bdi             (b * d * i);
16735                 const fp16type  afh             (a * f * h);
16736                 const float             r               (aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
16737                 const fp16type  rf16    (r);
16738
16739                 out[0] = rf16.bits();
16740                 min[0] = getMin(r, getULPs(in));
16741                 max[0] = getMax(r, getULPs(in));
16742
16743                 return true;
16744         }
16745 };
16746
16747 template<>
16748 struct fp16Determinant<4> : public fp16MatrixBase
16749 {
16750         virtual double getULPs (vector<const deFloat16*>& in)
16751         {
16752                 DE_UNREF(in);
16753
16754                 return 128.0; // This is not a precision test. Value is not from spec
16755         }
16756
16757         deUint32 getComponentValidity ()
16758         {
16759                 return 1;
16760         }
16761
16762         template<class fp16type>
16763         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16764         {
16765                 const size_t    rows            = 4;
16766                 const size_t    cols            = 4;
16767                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16768                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16769
16770                 DE_ASSERT(in.size() == 1);
16771                 DE_ASSERT(getOutCompCount() == 1);
16772                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
16773                 DE_UNREF(alignedCols);
16774                 DE_UNREF(alignedRows);
16775
16776                 // [ a b c d ]
16777                 // [ e f g h ]
16778                 // [ i j k l ]
16779                 // [ m n o p ]
16780                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
16781                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
16782                 const float             c               (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
16783                 const float             d               (fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
16784                 const float             e               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
16785                 const float             f               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
16786                 const float             g               (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
16787                 const float             h               (fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
16788                 const float             i               (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
16789                 const float             j               (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
16790                 const float             k               (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
16791                 const float             l               (fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
16792                 const float             m               (fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
16793                 const float             n               (fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
16794                 const float             o               (fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
16795                 const float             p               (fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
16796
16797                 // [ f g h ]
16798                 // [ j k l ]
16799                 // [ n o p ]
16800                 const fp16type  fkp             (f * k * p);
16801                 const fp16type  gln             (g * l * n);
16802                 const fp16type  hjo             (h * j * o);
16803                 const fp16type  hkn             (h * k * n);
16804                 const fp16type  gjp             (g * j * p);
16805                 const fp16type  flo             (f * l * o);
16806                 const fp16type  detA    (a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
16807
16808                 // [ e g h ]
16809                 // [ i k l ]
16810                 // [ m o p ]
16811                 const fp16type  ekp             (e * k * p);
16812                 const fp16type  glm             (g * l * m);
16813                 const fp16type  hio             (h * i * o);
16814                 const fp16type  hkm             (h * k * m);
16815                 const fp16type  gip             (g * i * p);
16816                 const fp16type  elo             (e * l * o);
16817                 const fp16type  detB    (b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
16818
16819                 // [ e f h ]
16820                 // [ i j l ]
16821                 // [ m n p ]
16822                 const fp16type  ejp             (e * j * p);
16823                 const fp16type  flm             (f * l * m);
16824                 const fp16type  hin             (h * i * n);
16825                 const fp16type  hjm             (h * j * m);
16826                 const fp16type  fip             (f * i * p);
16827                 const fp16type  eln             (e * l * n);
16828                 const fp16type  detC    (c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
16829
16830                 // [ e f g ]
16831                 // [ i j k ]
16832                 // [ m n o ]
16833                 const fp16type  ejo             (e * j * o);
16834                 const fp16type  fkm             (f * k * m);
16835                 const fp16type  gin             (g * i * n);
16836                 const fp16type  gjm             (g * j * m);
16837                 const fp16type  fio             (f * i * o);
16838                 const fp16type  ekn             (e * k * n);
16839                 const fp16type  detD    (d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
16840
16841                 const float             r               (detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
16842                 const fp16type  rf16    (r);
16843
16844                 out[0] = rf16.bits();
16845                 min[0] = getMin(r, getULPs(in));
16846                 max[0] = getMax(r, getULPs(in));
16847
16848                 return true;
16849         }
16850 };
16851
16852 template<size_t size>
16853 struct fp16Inverse;
16854
16855 template<>
16856 struct fp16Inverse<2> : public fp16MatrixBase
16857 {
16858         virtual double getULPs (vector<const deFloat16*>& in)
16859         {
16860                 DE_UNREF(in);
16861
16862                 return 128.0; // This is not a precision test. Value is not from spec
16863         }
16864
16865         deUint32 getComponentValidity ()
16866         {
16867                 return getComponentMatrixValidityMask(2, 2);
16868         }
16869
16870         template<class fp16type>
16871         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16872         {
16873                 const size_t    cols            = 2;
16874                 const size_t    rows            = 2;
16875                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
16876                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
16877
16878                 DE_ASSERT(in.size() == 1);
16879                 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
16880                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
16881                 DE_UNREF(alignedCols);
16882
16883                 // [ a b ]
16884                 // [ c d ]
16885                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
16886                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
16887                 const float             c               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
16888                 const float             d               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
16889                 const float             ad              (a * d);
16890                 const fp16type  adf16   (ad);
16891                 const float             bc              (b * c);
16892                 const fp16type  bcf16   (bc);
16893                 const float             det             (adf16.asFloat() - bcf16.asFloat());
16894                 const fp16type  det16   (det);
16895
16896                 out[0] = fp16type( d / det16.asFloat()).bits();
16897                 out[1] = fp16type(-c / det16.asFloat()).bits();
16898                 out[2] = fp16type(-b / det16.asFloat()).bits();
16899                 out[3] = fp16type( a / det16.asFloat()).bits();
16900
16901                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16902                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16903                         {
16904                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
16905                                 const fp16type  s       (out[ndx]);
16906
16907                                 min[ndx] = getMin(s.asDouble(), getULPs(in));
16908                                 max[ndx] = getMax(s.asDouble(), getULPs(in));
16909                         }
16910
16911                 return true;
16912         }
16913 };
16914
16915 inline std::string fp16ToString(deFloat16 val)
16916 {
16917         return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
16918 }
16919
16920 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
16921 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
16922 {
16923         if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
16924                 return false;
16925
16926         const size_t    resultStep                      = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
16927         const size_t    iterationsCount         = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
16928         const size_t    inputsSteps[3]          =
16929         {
16930                 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
16931                 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
16932                 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
16933         };
16934
16935         DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
16936         DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
16937
16938         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
16939         {
16940                 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
16941                 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
16942         }
16943
16944         const deFloat16* const          outputAsFP16                                    = (const deFloat16*)outputAllocs[0]->getHostPtr();
16945         TestedArithmeticFunction        func;
16946
16947         func.setOutCompCount(RES_COMPONENTS);
16948         func.setArgCompCount(0, ARG0_COMPONENTS);
16949         func.setArgCompCount(1, ARG1_COMPONENTS);
16950         func.setArgCompCount(2, ARG2_COMPONENTS);
16951
16952         const bool                                      callOncePerComponent                    = func.callOncePerComponent();
16953         const deUint32                          componentValidityMask                   = func.getComponentValidity();
16954         const size_t                            denormModesCount                                = 2;
16955         const char*                                     denormModes[denormModesCount]   = { "keep denormal numbers", "flush to zero" };
16956         const size_t                            successfulRunsPerComponent              = denormModesCount * func.getFlavorCount();
16957         bool                                            success                                                 = true;
16958         size_t                                          validatedCount                                  = 0;
16959
16960         vector<deUint8> inputBytes[3];
16961
16962         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
16963                 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
16964
16965         const deFloat16* const                  inputsAsFP16[3]                 =
16966         {
16967                 inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
16968                 inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
16969                 inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
16970         };
16971
16972         for (size_t idx = 0; idx < iterationsCount; ++idx)
16973         {
16974                 std::vector<size_t>                     successfulRuns          (RES_COMPONENTS, successfulRunsPerComponent);
16975                 std::vector<std::string>        errors                          (RES_COMPONENTS);
16976                 bool                                            iterationValidated      (true);
16977
16978                 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
16979                 {
16980                         for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
16981                         {
16982                                 func.setFlavor(flavorNdx);
16983
16984                                 const deFloat16*                        iterationOutputFP16             = &outputAsFP16[idx * resultStep];
16985                                 vector<deFloat16>                       iterationCalculatedFP16 (resultStep, 0);
16986                                 vector<double>                          iterationEdgeMin                (resultStep, 0.0);
16987                                 vector<double>                          iterationEdgeMax                (resultStep, 0.0);
16988                                 vector<const deFloat16*>        arguments;
16989
16990                                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
16991                                 {
16992                                         std::string     error;
16993                                         bool            reportError = false;
16994
16995                                         if (callOncePerComponent || componentNdx == 0)
16996                                         {
16997                                                 bool funcCallResult;
16998
16999                                                 arguments.clear();
17000
17001                                                 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17002                                                         arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17003
17004                                                 if (denormNdx == 0)
17005                                                         funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17006                                                 else
17007                                                         funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17008
17009                                                 if (!funcCallResult)
17010                                                 {
17011                                                         iterationValidated = false;
17012
17013                                                         if (callOncePerComponent)
17014                                                                 continue;
17015                                                         else
17016                                                                 break;
17017                                                 }
17018                                         }
17019
17020                                         if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17021                                                 continue;
17022
17023                                         reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17024
17025                                         if (reportError)
17026                                         {
17027                                                 tcu::Float16 expected   (iterationCalculatedFP16[componentNdx]);
17028                                                 tcu::Float16 outputted  (iterationOutputFP16[componentNdx]);
17029
17030                                                 if (reportError && expected.isNaN())
17031                                                         reportError = false;
17032
17033                                                 if (reportError && !expected.isNaN() && !outputted.isNaN())
17034                                                 {
17035                                                         if (reportError && !expected.isInf() && !outputted.isInf())
17036                                                         {
17037                                                                 // Ignore rounding
17038                                                                 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17039                                                                         reportError = false;
17040                                                         }
17041
17042                                                         if (reportError && expected.isInf())
17043                                                         {
17044                                                                 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17045                                                                 if (expected.sign() == 1 && outputted.bits() == 0x7bff && iterationEdgeMin[componentNdx] <= std::numeric_limits<double>::max())
17046                                                                         reportError = false;
17047                                                                 else if (expected.sign() == -1 && outputted.bits() == 0xfbff && iterationEdgeMax[componentNdx] >= -std::numeric_limits<double>::max())
17048                                                                         reportError = false;
17049                                                         }
17050
17051                                                         if (reportError)
17052                                                         {
17053                                                                 const double    outputtedDouble = outputted.asDouble();
17054
17055                                                                 DE_ASSERT(iterationEdgeMin[componentNdx] <= iterationEdgeMax[componentNdx]);
17056
17057                                                                 if (de::inRange(outputtedDouble, iterationEdgeMin[componentNdx], iterationEdgeMax[componentNdx]))
17058                                                                         reportError = false;
17059                                                         }
17060                                                 }
17061
17062                                                 if (reportError)
17063                                                 {
17064                                                         const size_t            inputsComps[3]  =
17065                                                         {
17066                                                                 ARG0_COMPONENTS,
17067                                                                 ARG1_COMPONENTS,
17068                                                                 ARG2_COMPONENTS,
17069                                                         };
17070                                                         string                          inputsValues    ("Inputs:");
17071                                                         string                          flavorName              (func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17072                                                         std::stringstream       errStream;
17073
17074                                                         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17075                                                         {
17076                                                                 const size_t    inputCompsCount = inputsComps[inputNdx];
17077
17078                                                                 inputsValues += " [" + de::toString(inputNdx) + "]=(";
17079
17080                                                                 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17081                                                                 {
17082                                                                         const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17083
17084                                                                         inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17085                                                                 }
17086                                                         }
17087
17088                                                         errStream       << "At"
17089                                                                                 << " iteration " << de::toString(idx)
17090                                                                                 << " component " << de::toString(componentNdx)
17091                                                                                 << " denormMode " << de::toString(denormNdx)
17092                                                                                 << " (" << denormModes[denormNdx] << ")"
17093                                                                                 << " " << flavorName
17094                                                                                 << " " << inputsValues
17095                                                                                 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17096                                                                                 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17097                                                                                 << " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17098                                                                                 << " " << error << "."
17099                                                                                 << std::endl;
17100
17101                                                         errors[componentNdx] += errStream.str();
17102
17103                                                         successfulRuns[componentNdx]--;
17104                                                 }
17105                                         }
17106                                 }
17107                         }
17108                 }
17109
17110                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17111                 {
17112                         // Check if any component has total failure
17113                         if (successfulRuns[componentNdx] == 0)
17114                         {
17115                                 // Test failed in all denorm modes and all flavors for certain component: dump errors
17116                                 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17117
17118                                 success = false;
17119                         }
17120                 }
17121
17122                 if (iterationValidated)
17123                         validatedCount++;
17124         }
17125
17126         if (validatedCount < 16)
17127                 TCU_THROW(InternalError, "Too few samples has been validated.");
17128
17129         return success;
17130 }
17131
17132 // IEEE-754 floating point numbers:
17133 // +--------+------+----------+-------------+
17134 // | binary | sign | exponent | significand |
17135 // +--------+------+----------+-------------+
17136 // | 16-bit |  1   |    5     |     10      |
17137 // +--------+------+----------+-------------+
17138 // | 32-bit |  1   |    8     |     23      |
17139 // +--------+------+----------+-------------+
17140 //
17141 // 16-bit floats:
17142 //
17143 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
17144 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17145 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
17146 // 0   111 10   11 1111 1111 (0x7bff: 65504:         maximum positive normalized)
17147 //
17148 // 0   000 00   00 0000 0000 (0x0000: +0)
17149 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
17150 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
17151 // 0   000 01   00 0000 0001 (0x0401: +Norm)
17152 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
17153 // 0   111 11   11 1111 0000 (0x7ff0: +QNaN)
17154 // Generate and return 16-bit floats and their corresponding 32-bit values.
17155 //
17156 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17157 // Expected count to be at least 14 (numPicks).
17158 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17159 {
17160         vector<deFloat16>       float16;
17161
17162         float16.reserve(count);
17163
17164         // Zero
17165         float16.push_back(deUint16(0x0000));
17166         float16.push_back(deUint16(0x8000));
17167         // Infinity
17168         float16.push_back(deUint16(0x7c00));
17169         float16.push_back(deUint16(0xfc00));
17170         // Normalized
17171         float16.push_back(deUint16(0x0401));
17172         float16.push_back(deUint16(0x8401));
17173         // Some normal number
17174         float16.push_back(deUint16(0x14cb));
17175         float16.push_back(deUint16(0x94cb));
17176         // Min/max positive normal
17177         float16.push_back(deUint16(0x0400));
17178         float16.push_back(deUint16(0x7bff));
17179         // Min/max negative normal
17180         float16.push_back(deUint16(0x8400));
17181         float16.push_back(deUint16(0xfbff));
17182         // PI
17183         float16.push_back(deUint16(0x4248)); // 3.140625
17184         float16.push_back(deUint16(0xb248)); // -3.140625
17185         // PI/2
17186         float16.push_back(deUint16(0x3e48)); // 1.5703125
17187         float16.push_back(deUint16(0xbe48)); // -1.5703125
17188         float16.push_back(deUint16(0x3c00)); // 1.0
17189         float16.push_back(deUint16(0x3800)); // 0.5
17190         // Some useful constants
17191         float16.push_back(tcu::Float16(-2.5f).bits());
17192         float16.push_back(tcu::Float16(-1.0f).bits());
17193         float16.push_back(tcu::Float16( 0.4f).bits());
17194         float16.push_back(tcu::Float16( 2.5f).bits());
17195
17196         const deUint32          numPicks        = static_cast<deUint32>(float16.size());
17197
17198         DE_ASSERT(count >= numPicks);
17199         count -= numPicks;
17200
17201         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17202         {
17203                 int                     sign            = (rnd.getUint16() % 2 == 0) ? +1 : -1;
17204                 int                     exponent        = (rnd.getUint16() % 29) - 14 + 1;
17205                 deUint16        mantissa        = static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17206
17207                 // Exclude power of -14 to avoid denorms
17208                 DE_ASSERT(de::inRange(exponent, -13, 15));
17209
17210                 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17211         }
17212
17213         return float16;
17214 }
17215
17216 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17217 {
17218         DE_UNREF(argNo);
17219
17220         de::Random      rnd(seed);
17221
17222         return getFloat16a(rnd, static_cast<deUint32>(count));
17223 }
17224
17225 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17226 {
17227         de::Random      rnd             (seed);
17228         size_t          newCount = static_cast<size_t>(deSqrt(double(count)));
17229
17230         DE_ASSERT(newCount * newCount == count);
17231
17232         vector<deFloat16>       float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17233
17234         return squarize(float16, static_cast<deUint32>(argNo));
17235 }
17236
17237 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17238 {
17239         if (argNo == 0 || argNo == 1)
17240                 return getInputData2(seed, count, argNo);
17241         else
17242                 return getInputData1(seed<<argNo, count, argNo);
17243 }
17244
17245 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17246 {
17247         DE_UNREF(stride);
17248
17249         vector<deFloat16>       result;
17250
17251         switch (argCount)
17252         {
17253                 case 1:result = getInputData1(seed, count, argNo); break;
17254                 case 2:result = getInputData2(seed, count, argNo); break;
17255                 case 3:result = getInputData3(seed, count, argNo); break;
17256                 default: TCU_THROW(InternalError, "Invalid argument count specified");
17257         }
17258
17259         if (compCount == 3)
17260         {
17261                 const size_t            newCount = (3 * count) / 4;
17262                 vector<deFloat16>       newResult;
17263
17264                 newResult.reserve(result.size());
17265
17266                 for (size_t ndx = 0; ndx < newCount; ++ndx)
17267                 {
17268                         newResult.push_back(result[ndx]);
17269
17270                         if (ndx % 3 == 2)
17271                                 newResult.push_back(0);
17272                 }
17273
17274                 result = newResult;
17275         }
17276
17277         DE_ASSERT(result.size() == count);
17278
17279         return result;
17280 }
17281
17282 // Generator for functions requiring data in range [1, inf]
17283 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17284 {
17285         vector<deFloat16>       result;
17286
17287         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17288
17289         // Filter out values below 1.0 from upper half of numbers
17290         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17291         {
17292                 const float f = tcu::Float16(result[idx]).asFloat();
17293
17294                 if (f < 1.0f)
17295                         result[idx] = tcu::Float16(1.0f - f).bits();
17296         }
17297
17298         return result;
17299 }
17300
17301 // Generator for functions requiring data in range [-1, 1]
17302 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17303 {
17304         vector<deFloat16>       result;
17305
17306         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17307
17308         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17309         {
17310                 const float f = tcu::Float16(result[idx]).asFloat();
17311
17312                 if (!de::inRange(f, -1.0f, 1.0f))
17313                         result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17314         }
17315
17316         return result;
17317 }
17318
17319 // Generator for functions requiring data in range [-pi, pi]
17320 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17321 {
17322         vector<deFloat16>       result;
17323
17324         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17325
17326         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17327         {
17328                 const float f = tcu::Float16(result[idx]).asFloat();
17329
17330                 if (!de::inRange(f, -DE_PI, DE_PI))
17331                         result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17332         }
17333
17334         return result;
17335 }
17336
17337 // Generator for functions requiring data in range [0, inf]
17338 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17339 {
17340         vector<deFloat16>       result;
17341
17342         result = getInputData(seed, count, compCount, stride, argCount, argNo);
17343
17344         if (argNo == 0)
17345         {
17346                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17347                         result[idx] &= static_cast<deFloat16>(~0x8000);
17348         }
17349
17350         return result;
17351 }
17352
17353 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17354 {
17355         DE_UNREF(stride);
17356         DE_UNREF(argCount);
17357
17358         vector<deFloat16>       result;
17359
17360         if (argNo == 0)
17361                 result = getInputData2(seed, count, argNo);
17362         else
17363         {
17364                 const size_t            alignedCount    = (compCount == 3) ? 4 : compCount;
17365                 const size_t            newCountX               = static_cast<size_t>(deSqrt(double(count * alignedCount)));
17366                 const size_t            newCountY               = count / newCountX;
17367                 de::Random                      rnd                             (seed);
17368                 vector<deFloat16>       float16                 = getFloat16a(rnd, static_cast<deUint32>(newCountX));
17369
17370                 DE_ASSERT(newCountX * newCountX == alignedCount * count);
17371
17372                 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
17373                 {
17374                         const vector<deFloat16> tmp(newCountY, float16[numIdx]);
17375
17376                         result.insert(result.end(), tmp.begin(), tmp.end());
17377                 }
17378         }
17379
17380         DE_ASSERT(result.size() == count);
17381
17382         return result;
17383 }
17384
17385 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17386 {
17387         DE_UNREF(compCount);
17388         DE_UNREF(stride);
17389         DE_UNREF(argCount);
17390
17391         de::Random                      rnd             (seed << argNo);
17392         vector<deFloat16>       result;
17393
17394         result = getFloat16a(rnd, static_cast<deUint32>(count));
17395
17396         DE_ASSERT(result.size() == count);
17397
17398         return result;
17399 }
17400
17401 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17402 {
17403         DE_UNREF(compCount);
17404         DE_UNREF(argCount);
17405
17406         de::Random                      rnd             (seed << argNo);
17407         vector<deFloat16>       result;
17408
17409         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17410         {
17411                 int num = (rnd.getUint16() % 16) - 8;
17412
17413                 result.push_back(tcu::Float16(float(num)).bits());
17414         }
17415
17416         result[0 * stride] = deUint16(0x7c00); // +Inf
17417         result[1 * stride] = deUint16(0xfc00); // -Inf
17418
17419         DE_ASSERT(result.size() == count);
17420
17421         return result;
17422 }
17423
17424 // Generator for smoothstep function
17425 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17426 {
17427         vector<deFloat16>       result;
17428
17429         result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
17430
17431         if (argNo == 0)
17432         {
17433                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17434                 {
17435                         const float f = tcu::Float16(result[idx]).asFloat();
17436
17437                         if (f > 4.0f)
17438                                 result[idx] = tcu::Float16(-f).bits();
17439                 }
17440         }
17441
17442         if (argNo == 1)
17443         {
17444                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17445                 {
17446                         const float f = tcu::Float16(result[idx]).asFloat();
17447
17448                         if (f < 4.0f)
17449                                 result[idx] = tcu::Float16(-f).bits();
17450                 }
17451         }
17452
17453         return result;
17454 }
17455
17456 // Generates normalized vectors for arguments 0 and 1
17457 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17458 {
17459         DE_UNREF(compCount);
17460         DE_UNREF(argCount);
17461
17462         de::Random                      rnd             (seed << argNo);
17463         vector<deFloat16>       result;
17464
17465         if (argNo == 0 || argNo == 1)
17466         {
17467                 // The input parameters for the incident vector I and the surface normal N must already be normalized
17468                 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
17469                 {
17470                         vector <float>  unnormolized;
17471                         float                   sum                             = 0;
17472
17473                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
17474                                 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
17475
17476                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
17477                                 sum += unnormolized[compIdx] * unnormolized[compIdx];
17478
17479                         sum = deFloatSqrt(sum);
17480                         if (sum == 0.0f)
17481                                 unnormolized[0] = sum = 1.0f;
17482
17483                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
17484                                 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
17485
17486                         for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
17487                                 result.push_back(0);
17488                 }
17489         }
17490         else
17491         {
17492                 // Input parameter eta
17493                 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17494                 {
17495                         int num = (rnd.getUint16() % 16) - 8;
17496
17497                         result.push_back(tcu::Float16(float(num)).bits());
17498                 }
17499         }
17500
17501         DE_ASSERT(result.size() == count);
17502
17503         return result;
17504 }
17505
17506 // Data generator for complex matrix functions like determinant and inverse
17507 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17508 {
17509         DE_UNREF(compCount);
17510         DE_UNREF(stride);
17511         DE_UNREF(argCount);
17512
17513         de::Random                      rnd             (seed << argNo);
17514         vector<deFloat16>       result;
17515
17516         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17517         {
17518                 int num = (rnd.getUint16() % 16) - 8;
17519
17520                 result.push_back(tcu::Float16(float(num)).bits());
17521         }
17522
17523         DE_ASSERT(result.size() == count);
17524
17525         return result;
17526 }
17527
17528 struct Math16TestType
17529 {
17530         const char*             typePrefix;
17531         const size_t    typeComponents;
17532         const size_t    typeArrayStride;
17533         const size_t    typeStructStride;
17534         const char*             storage_type;
17535 };
17536
17537 enum Math16DataTypes
17538 {
17539         NONE    = 0,
17540         SCALAR  = 1,
17541         VEC2    = 2,
17542         VEC3    = 3,
17543         VEC4    = 4,
17544         MAT2X2,
17545         MAT2X3,
17546         MAT2X4,
17547         MAT3X2,
17548         MAT3X3,
17549         MAT3X4,
17550         MAT4X2,
17551         MAT4X3,
17552         MAT4X4,
17553         MATH16_TYPE_LAST
17554 };
17555
17556 struct Math16ArgFragments
17557 {
17558         const char*     bodies;
17559         const char*     variables;
17560         const char*     decorations;
17561         const char*     funcVariables;
17562 };
17563
17564 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
17565
17566 struct Math16TestFunc
17567 {
17568         const char*                                     funcName;
17569         const char*                                     funcSuffix;
17570         size_t                                          funcArgsCount;
17571         size_t                                          typeResult;
17572         size_t                                          typeArg0;
17573         size_t                                          typeArg1;
17574         size_t                                          typeArg2;
17575         Math16GetInputData*                     getInputDataFunc;
17576         VerifyIOFunc                            verifyFunc;
17577 };
17578
17579 template<class SpecResource>
17580 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
17581 {
17582         const int                                       testSpecificSeed                        = deStringHash(testGroup.getName());
17583         const int                                       seed                                            = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
17584         const size_t                            numDataPointsByAxis                     = 32;
17585         const size_t                            numDataPoints                           = numDataPointsByAxis * numDataPointsByAxis;
17586         const char*                                     componentType                           = "f16";
17587         const Math16TestType            testTypes[MATH16_TYPE_LAST]     =
17588         {
17589                 { "",           0,       0,                                              0,                                             "" },
17590                 { "",           1,       1 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_half_ndp" },
17591                 { "v2",         2,       2 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_ndp" },
17592                 { "v3",         3,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
17593                 { "v4",         4,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
17594                 { "m2x2",       0,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
17595                 { "m2x3",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
17596                 { "m2x4",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
17597                 { "m3x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_3" },
17598                 { "m3x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
17599                 { "m3x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
17600                 { "m4x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
17601                 { "m4x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
17602                 { "m4x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
17603         };
17604
17605         DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
17606
17607
17608         const StringTemplate preMain
17609         (
17610                 "     %c_i32_ndp  = OpConstant %i32 ${num_data_points}\n"
17611
17612                 "        %f16     = OpTypeFloat 16\n"
17613                 "        %v2f16   = OpTypeVector %f16 2\n"
17614                 "        %v3f16   = OpTypeVector %f16 3\n"
17615                 "        %v4f16   = OpTypeVector %f16 4\n"
17616                 "        %m2x2f16 = OpTypeMatrix %v2f16 2\n"
17617                 "        %m2x3f16 = OpTypeMatrix %v3f16 2\n"
17618                 "        %m2x4f16 = OpTypeMatrix %v4f16 2\n"
17619                 "        %m3x2f16 = OpTypeMatrix %v2f16 3\n"
17620                 "        %m3x3f16 = OpTypeMatrix %v3f16 3\n"
17621                 "        %m3x4f16 = OpTypeMatrix %v4f16 3\n"
17622                 "        %m4x2f16 = OpTypeMatrix %v2f16 4\n"
17623                 "        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
17624                 "        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
17625
17626                 "       %fp_v2i32 = OpTypePointer Function %v2i32\n"
17627                 "       %fp_v3i32 = OpTypePointer Function %v3i32\n"
17628                 "       %fp_v4i32 = OpTypePointer Function %v4i32\n"
17629
17630                 "      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
17631                 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
17632                 "        %c_u32_5 = OpConstant %u32 5\n"
17633                 "        %c_u32_6 = OpConstant %u32 6\n"
17634                 "        %c_u32_7 = OpConstant %u32 7\n"
17635                 "        %c_u32_8 = OpConstant %u32 8\n"
17636                 "        %c_f16_0 = OpConstant %f16 0\n"
17637                 "        %c_f16_1 = OpConstant %f16 1\n"
17638                 "      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
17639                 "         %up_u32 = OpTypePointer Uniform %u32\n"
17640                 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
17641                 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
17642
17643                 "    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
17644                 "  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
17645                 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
17646                 "         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
17647                 "       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
17648                 "    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
17649                 "           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
17650                 "        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
17651                 "      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
17652                 "     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
17653                 "  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
17654                 "           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
17655                 "        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
17656                 "        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
17657                 "     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
17658                 "  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
17659                 "           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
17660                 "        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
17661                 "        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
17662                 "     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
17663                 "  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
17664                 "           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
17665                 "        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
17666                 "        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
17667                 "     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
17668                 "  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
17669                 "           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
17670                 "        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
17671                 "        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
17672                 "     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
17673                 "  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
17674
17675                 "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
17676                 "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
17677                 "       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
17678                 "       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
17679                 "     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
17680                 "     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
17681                 "     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
17682                 "     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
17683                 "     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
17684                 "     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
17685                 "     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
17686                 "     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
17687                 "     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
17688                 "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
17689                 "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
17690                 "  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
17691                 "  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
17692                 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
17693                 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
17694                 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
17695                 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
17696                 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
17697                 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
17698                 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
17699                 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
17700                 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
17701                 "${arg_vars}"
17702         );
17703
17704         const StringTemplate decoration
17705         (
17706                 "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
17707                 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
17708                 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
17709
17710                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
17711                 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
17712                 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
17713
17714                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
17715                 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
17716                 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
17717                 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
17718
17719                 "OpDecorate %ra_u32_4 ArrayStride 4\n"
17720                 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
17721                 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
17722                 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
17723
17724                 "OpDecorate %ra_u32_3 ArrayStride 4\n"
17725                 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
17726                 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
17727                 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
17728
17729                 "OpDecorate %ra_u32_6 ArrayStride 4\n"
17730                 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
17731                 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
17732                 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
17733
17734                 "OpDecorate %ra_u32_8 ArrayStride 4\n"
17735                 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
17736                 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
17737                 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
17738
17739                 "${arg_decorations}"
17740         );
17741
17742         const StringTemplate testFun
17743         (
17744                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
17745                 "    %param = OpFunctionParameter %v4f32\n"
17746                 "    %entry = OpLabel\n"
17747
17748                 "        %i = OpVariable %fp_i32 Function\n"
17749                 "${arg_infunc_vars}"
17750                 "             OpStore %i %c_i32_0\n"
17751                 "             OpBranch %loop\n"
17752
17753                 "     %loop = OpLabel\n"
17754                 "    %i_cmp = OpLoad %i32 %i\n"
17755                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
17756                 "             OpLoopMerge %merge %next None\n"
17757                 "             OpBranchConditional %lt %write %merge\n"
17758
17759                 "    %write = OpLabel\n"
17760                 "      %ndx = OpLoad %i32 %i\n"
17761
17762                 "${arg_func_call}"
17763
17764                 "             OpBranch %next\n"
17765
17766                 "     %next = OpLabel\n"
17767                 "    %i_cur = OpLoad %i32 %i\n"
17768                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
17769                 "             OpStore %i %i_new\n"
17770                 "             OpBranch %loop\n"
17771
17772                 "    %merge = OpLabel\n"
17773                 "             OpReturnValue %param\n"
17774                 "             OpFunctionEnd\n"
17775         );
17776
17777         const Math16ArgFragments        argFragment1    =
17778         {
17779                 "     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17780                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
17781                 "     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17782                 "",
17783                 "",
17784                 "",
17785         };
17786
17787         const Math16ArgFragments        argFragment2    =
17788         {
17789                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17790                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
17791                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
17792                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17793                 "",
17794                 "",
17795                 "",
17796         };
17797
17798         const Math16ArgFragments        argFragment3    =
17799         {
17800                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17801                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
17802                 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
17803                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
17804                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17805                 "",
17806                 "",
17807                 "",
17808         };
17809
17810         const Math16ArgFragments        argFragmentLdExp        =
17811         {
17812                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17813                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
17814                 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
17815                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
17816                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17817
17818                 "",
17819
17820                 "",
17821
17822                 "",
17823         };
17824
17825         const Math16ArgFragments        argFragmentModfFrac     =
17826         {
17827                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17828                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
17829                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17830
17831                 "   %fp_tmp = OpTypePointer Function %${tr}\n",
17832
17833                 "",
17834
17835                 "      %tmp = OpVariable %fp_tmp Function\n",
17836         };
17837
17838         const Math16ArgFragments        argFragmentModfInt      =
17839         {
17840                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17841                 "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
17842                 "     %tmp0 = OpAccessChain %fp_tmp %tmp\n"
17843                 "  %val_dst = OpLoad %${tr} %tmp0\n"
17844                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17845
17846                 "   %fp_tmp = OpTypePointer Function %${tr}\n",
17847
17848                 "",
17849
17850                 "      %tmp = OpVariable %fp_tmp Function\n",
17851         };
17852
17853         const Math16ArgFragments        argFragmentModfStruct   =
17854         {
17855                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17856                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
17857                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
17858                 "             OpStore %tmp_ptr_s %val_tmp\n"
17859                 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
17860                 "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
17861                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17862
17863                 "  %fp_${tr} = OpTypePointer Function %${tr}\n"
17864                 "   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
17865                 "   %fp_tmp = OpTypePointer Function %st_tmp\n"
17866                 "   %c_frac = OpConstant %i32 0\n"
17867                 "    %c_int = OpConstant %i32 1\n",
17868
17869                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
17870                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
17871
17872                 "      %tmp = OpVariable %fp_tmp Function\n",
17873         };
17874
17875         const Math16ArgFragments        argFragmentFrexpStructS =
17876         {
17877                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17878                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
17879                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
17880                 "             OpStore %tmp_ptr_s %val_tmp\n"
17881                 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
17882                 "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
17883                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17884
17885                 "  %fp_${tr} = OpTypePointer Function %${tr}\n"
17886                 "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
17887                 "   %fp_tmp = OpTypePointer Function %st_tmp\n",
17888
17889                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
17890                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
17891
17892                 "      %tmp = OpVariable %fp_tmp Function\n",
17893         };
17894
17895         const Math16ArgFragments        argFragmentFrexpStructE =
17896         {
17897                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17898                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
17899                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
17900                 "             OpStore %tmp_ptr_s %val_tmp\n"
17901                 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
17902                 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
17903                 "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
17904                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17905
17906                 "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
17907                 "   %fp_tmp = OpTypePointer Function %st_tmp\n",
17908
17909                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
17910                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
17911
17912                 "      %tmp = OpVariable %fp_tmp Function\n",
17913         };
17914
17915         const Math16ArgFragments        argFragmentFrexpS               =
17916         {
17917                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17918                 "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
17919                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
17920                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17921
17922                 "",
17923
17924                 "",
17925
17926                 "      %tmp = OpVariable %fp_${dr}i32 Function\n",
17927         };
17928
17929         const Math16ArgFragments        argFragmentFrexpE               =
17930         {
17931                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
17932                 "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
17933                 "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
17934                 "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
17935                 "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
17936                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17937
17938                 "",
17939
17940                 "",
17941
17942                 "      %tmp = OpVariable %fp_${dr}i32 Function\n",
17943         };
17944
17945         string load_funcs[MATH16_TYPE_LAST];
17946         load_funcs[SCALAR] = loadScalarF16FromUint;
17947         load_funcs[VEC2]   = loadV2F16FromUint;
17948         load_funcs[VEC3]   = loadV3F16FromUints;
17949         load_funcs[VEC4]   = loadV4F16FromUints;
17950         load_funcs[MAT2X2] = loadM2x2F16FromUints;
17951         load_funcs[MAT2X3] = loadM2x3F16FromUints;
17952         load_funcs[MAT2X4] = loadM2x4F16FromUints;
17953         load_funcs[MAT3X2] = loadM3x2F16FromUints;
17954         load_funcs[MAT3X3] = loadM3x3F16FromUints;
17955         load_funcs[MAT3X4] = loadM3x4F16FromUints;
17956         load_funcs[MAT4X2] = loadM4x2F16FromUints;
17957         load_funcs[MAT4X3] = loadM4x3F16FromUints;
17958         load_funcs[MAT4X4] = loadM4x4F16FromUints;
17959
17960         string store_funcs[MATH16_TYPE_LAST];
17961         store_funcs[SCALAR] = storeScalarF16AsUint;
17962         store_funcs[VEC2]   = storeV2F16AsUint;
17963         store_funcs[VEC3]   = storeV3F16AsUints;
17964         store_funcs[VEC4]   = storeV4F16AsUints;
17965         store_funcs[MAT2X2] = storeM2x2F16AsUints;
17966         store_funcs[MAT2X3] = storeM2x3F16AsUints;
17967         store_funcs[MAT2X4] = storeM2x4F16AsUints;
17968         store_funcs[MAT3X2] = storeM3x2F16AsUints;
17969         store_funcs[MAT3X3] = storeM3x3F16AsUints;
17970         store_funcs[MAT3X4] = storeM3x4F16AsUints;
17971         store_funcs[MAT4X2] = storeM4x2F16AsUints;
17972         store_funcs[MAT4X3] = storeM4x3F16AsUints;
17973         store_funcs[MAT4X4] = storeM4x4F16AsUints;
17974
17975         const Math16TestType&           testType                                = testTypes[testTypeIdx];
17976         const string                            funcNameString                  = string(testFunc.funcName) + string(testFunc.funcSuffix);
17977         const string                            testName                                = de::toLower(funcNameString);
17978         const Math16ArgFragments*       argFragments                    = DE_NULL;
17979         const size_t                            typeStructStride                = testType.typeStructStride;
17980         const bool                                      extInst                                 = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
17981         const size_t                            numFloatsPerArg0Type    = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
17982         const size_t                            iterations                              = numDataPoints / numFloatsPerArg0Type;
17983         const size_t                            numFloatsPerResultType  = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
17984         const vector<deFloat16>         float16DummyOutput              (iterations * numFloatsPerResultType, 0);
17985         VulkanFeatures                          features;
17986         SpecResource                            specResource;
17987         map<string, string>                     specs;
17988         map<string, string>                     fragments;
17989         vector<string>                          extensions;
17990         string                                          funcCall;
17991         string                                          funcVariables;
17992         string                                          variables;
17993         string                                          declarations;
17994         string                                          decorations;
17995         string                                          functions;
17996
17997         switch (testFunc.funcArgsCount)
17998         {
17999                 case 1:
18000                 {
18001                         argFragments = &argFragment1;
18002
18003                         if (funcNameString == "ModfFrac")               argFragments = &argFragmentModfFrac;
18004                         if (funcNameString == "ModfInt")                argFragments = &argFragmentModfInt;
18005                         if (funcNameString == "ModfStructFrac") argFragments = &argFragmentModfStruct;
18006                         if (funcNameString == "ModfStructInt")  argFragments = &argFragmentModfStruct;
18007                         if (funcNameString == "FrexpS")                 argFragments = &argFragmentFrexpS;
18008                         if (funcNameString == "FrexpE")                 argFragments = &argFragmentFrexpE;
18009                         if (funcNameString == "FrexpStructS")   argFragments = &argFragmentFrexpStructS;
18010                         if (funcNameString == "FrexpStructE")   argFragments = &argFragmentFrexpStructE;
18011
18012                         break;
18013                 }
18014                 case 2:
18015                 {
18016                         argFragments = &argFragment2;
18017
18018                         if (funcNameString == "Ldexp")                  argFragments = &argFragmentLdExp;
18019
18020                         break;
18021                 }
18022                 case 3:
18023                 {
18024                         argFragments = &argFragment3;
18025
18026                         break;
18027                 }
18028                 default:
18029                 {
18030                         TCU_THROW(InternalError, "Invalid number of arguments");
18031                 }
18032         }
18033
18034         functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18035         if (testFunc.funcArgsCount == 1)
18036         {
18037                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18038                 variables +=
18039                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18040                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18041
18042                 decorations +=
18043                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18044                         "OpDecorate %ssbo_src0 Binding 0\n"
18045                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
18046                         "OpDecorate %ssbo_dst Binding 1\n";
18047         }
18048         else if (testFunc.funcArgsCount == 2)
18049         {
18050                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18051                 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18052                 variables +=
18053                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18054                         " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18055                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18056
18057                 decorations +=
18058                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18059                         "OpDecorate %ssbo_src0 Binding 0\n"
18060                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18061                         "OpDecorate %ssbo_src1 Binding 1\n"
18062                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
18063                         "OpDecorate %ssbo_dst Binding 2\n";
18064         }
18065         else if (testFunc.funcArgsCount == 3)
18066         {
18067                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18068                 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18069                 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18070                 variables +=
18071                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18072                         " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18073                         " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18074                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18075
18076                 decorations +=
18077                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18078                         "OpDecorate %ssbo_src0 Binding 0\n"
18079                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18080                         "OpDecorate %ssbo_src1 Binding 1\n"
18081                         "OpDecorate %ssbo_src2 DescriptorSet 0\n"
18082                         "OpDecorate %ssbo_src2 Binding 2\n"
18083                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
18084                         "OpDecorate %ssbo_dst Binding 3\n";
18085         }
18086         else
18087         {
18088                 TCU_THROW(InternalError, "Invalid number of function arguments");
18089         }
18090
18091         variables       += argFragments->variables;
18092         decorations     += argFragments->decorations;
18093
18094         specs["dr"]                                     = testTypes[testFunc.typeResult].typePrefix;
18095         specs["d0"]                                     = testTypes[testFunc.typeArg0].typePrefix;
18096         specs["d1"]                                     = testTypes[testFunc.typeArg1].typePrefix;
18097         specs["d2"]                                     = testTypes[testFunc.typeArg2].typePrefix;
18098         specs["tr"]                                     = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18099         specs["t0"]                                     = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18100         specs["t1"]                                     = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18101         specs["t2"]                                     = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18102         specs["store_tr"]                       = string(testTypes[testFunc.typeResult].storage_type);
18103         specs["store_t0"]                       = string(testTypes[testFunc.typeArg0].storage_type);
18104         specs["store_t1"]                       = string(testTypes[testFunc.typeArg1].storage_type);
18105         specs["store_t2"]                       = string(testTypes[testFunc.typeArg2].storage_type);
18106         specs["struct_stride"]          = de::toString(typeStructStride);
18107         specs["op"]                                     = extInst ? "OpExtInst" : testFunc.funcName;
18108         specs["ext_inst"]                       = extInst ? string("%ext_import ") + testFunc.funcName : "";
18109         specs["struct_member"]          = de::toLower(testFunc.funcSuffix);
18110
18111         variables                                       = StringTemplate(variables).specialize(specs);
18112         decorations                                     = StringTemplate(decorations).specialize(specs);
18113         funcVariables                           = StringTemplate(argFragments->funcVariables).specialize(specs);
18114         funcCall                                        = StringTemplate(argFragments->bodies).specialize(specs);
18115
18116         specs["num_data_points"]        = de::toString(iterations);
18117         specs["arg_vars"]                       = variables;
18118         specs["arg_decorations"]        = decorations;
18119         specs["arg_infunc_vars"]        = funcVariables;
18120         specs["arg_func_call"]          = funcCall;
18121
18122         fragments["extension"]          = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18123         fragments["capability"]         = "OpCapability Matrix\nOpCapability Float16\n";
18124         fragments["decoration"]         = decoration.specialize(specs);
18125         fragments["pre_main"]           = preMain.specialize(specs) + functions;
18126         fragments["testfun"]            = testFun.specialize(specs);
18127
18128         for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18129         {
18130                 const size_t                    numFloatsPerItem        = (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18131                                                                                                         : (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18132                                                                                                         : (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18133                                                                                                         : -1;
18134                 const vector<deFloat16> inputData                       = testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18135
18136                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18137         }
18138
18139         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18140         specResource.verifyIO = testFunc.verifyFunc;
18141
18142         extensions.push_back("VK_KHR_shader_float16_int8");
18143
18144         features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
18145
18146         finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18147 }
18148
18149 template<size_t C, class SpecResource>
18150 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18151 {
18152         DE_STATIC_ASSERT(C >= 1 && C <= 4);
18153
18154         const std::string                               testGroupName   (string("arithmetic_") + de::toString(C));
18155         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18156         const Math16TestFunc                    testFuncs[]             =
18157         {
18158                 {       "OpFNegate",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16OpFNegate>                                       },
18159                 {       "Round",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Round>                                           },
18160                 {       "RoundEven",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16RoundEven>                                       },
18161                 {       "Trunc",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Trunc>                                           },
18162                 {       "FAbs",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FAbs>                                            },
18163                 {       "FSign",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FSign>                                           },
18164                 {       "Floor",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Floor>                                           },
18165                 {       "Ceil",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Ceil>                                            },
18166                 {       "Fract",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Fract>                                           },
18167                 {       "Radians",                              "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Radians>                                         },
18168                 {       "Degrees",                              "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Degrees>                                         },
18169                 {       "Sin",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sin>                                                     },
18170                 {       "Cos",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cos>                                                     },
18171                 {       "Tan",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tan>                                                     },
18172                 {       "Asin",                                 "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asin>                                            },
18173                 {       "Acos",                                 "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acos>                                            },
18174                 {       "Atan",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atan>                                            },
18175                 {       "Sinh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sinh>                                            },
18176                 {       "Cosh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cosh>                                            },
18177                 {       "Tanh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tanh>                                            },
18178                 {       "Asinh",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asinh>                                           },
18179                 {       "Acosh",                                "",                     1,      C,              C,              0,              0, &getInputDataAC,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acosh>                                           },
18180                 {       "Atanh",                                "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atanh>                                           },
18181                 {       "Exp",                                  "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp>                                                     },
18182                 {       "Log",                                  "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log>                                                     },
18183                 {       "Exp2",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp2>                                            },
18184                 {       "Log2",                                 "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log2>                                            },
18185                 {       "Sqrt",                                 "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sqrt>                                            },
18186                 {       "InverseSqrt",                  "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16InverseSqrt>                                     },
18187                 {       "Modf",                                 "Frac",         1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>                                        },
18188                 {       "Modf",                                 "Int",          1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>                                         },
18189                 {       "ModfStruct",                   "Frac",         1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>                                        },
18190                 {       "ModfStruct",                   "Int",          1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>                                         },
18191                 {       "Frexp",                                "S",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>                                          },
18192                 {       "Frexp",                                "E",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>                                          },
18193                 {       "FrexpStruct",                  "S",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>                                          },
18194                 {       "FrexpStruct",                  "E",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>                                          },
18195                 {       "OpFAdd",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFAdd>                                          },
18196                 {       "OpFSub",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFSub>                                          },
18197                 {       "OpFMul",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFMul>                                          },
18198                 {       "OpFDiv",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFDiv>                                          },
18199                 {       "Atan2",                                "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Atan2>                                           },
18200                 {       "Pow",                                  "",                     2,      C,              C,              C,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Pow>                                                     },
18201                 {       "FMin",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMin>                                            },
18202                 {       "FMax",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMax>                                            },
18203                 {       "Step",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Step>                                            },
18204                 {       "Ldexp",                                "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Ldexp>                                           },
18205                 {       "FClamp",                               "",                     3,      C,              C,              C,              C, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FClamp>                                          },
18206                 {       "FMix",                                 "",                     3,      C,              C,              C,              C, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FMix>                                            },
18207                 {       "SmoothStep",                   "",                     3,      C,              C,              C,              C, &getInputDataSS,     compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16SmoothStep>                                      },
18208                 {       "Fma",                                  "",                     3,      C,              C,              C,              C, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16Fma>                                                     },
18209                 {       "Length",                               "",                     1,      1,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  1,  C,  0,  0, fp16Length>                                          },
18210                 {       "Distance",                             "",                     2,      1,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Distance>                                        },
18211                 {       "Cross",                                "",                     2,      C,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Cross>                                           },
18212                 {       "Normalize",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Normalize>                                       },
18213                 {       "FaceForward",                  "",                     3,      C,              C,              C,              C, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FaceForward>                                     },
18214                 {       "Reflect",                              "",                     2,      C,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Reflect>                                         },
18215                 {       "Refract",                              "",                     3,      C,              C,              C,              1, &getInputDataN,      compareFP16ArithmeticFunc<  C,  C,  C,  1, fp16Refract>                                         },
18216                 {       "OpDot",                                "",                     2,      1,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Dot>                                                     },
18217                 {       "OpVectorTimesScalar",  "",                     2,      C,              C,              1,              0, &getInputDataV,      compareFP16ArithmeticFunc<  C,  C,  1,  0, fp16VectorTimesScalar>                       },
18218         };
18219
18220         for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18221         {
18222                 const Math16TestFunc&   testFunc                = testFuncs[testFuncIdx];
18223                 const string                    funcNameString  = testFunc.funcName;
18224
18225                 if ((C != 3) && funcNameString == "Cross")
18226                         continue;
18227
18228                 if ((C < 2) && funcNameString == "OpDot")
18229                         continue;
18230
18231                 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18232                         continue;
18233
18234                 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18235         }
18236
18237         return testGroup.release();
18238 }
18239
18240 template<class SpecResource>
18241 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18242 {
18243         const std::string                               testGroupName   ("arithmetic");
18244         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18245         const Math16TestFunc                    testFuncs[]             =
18246         {
18247                 {       "OpTranspose",                  "2x2",          1,      MAT2X2, MAT2X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Transpose<2,2> >                         },
18248                 {       "OpTranspose",                  "3x2",          1,      MAT2X3, MAT3X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<3,2> >                         },
18249                 {       "OpTranspose",                  "4x2",          1,      MAT2X4, MAT4X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<4,2> >                         },
18250                 {       "OpTranspose",                  "2x3",          1,      MAT3X2, MAT2X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,3> >                         },
18251                 {       "OpTranspose",                  "3x3",          1,      MAT3X3, MAT3X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,3> >                         },
18252                 {       "OpTranspose",                  "4x3",          1,      MAT3X4, MAT4X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,3> >                         },
18253                 {       "OpTranspose",                  "2x4",          1,      MAT4X2, MAT2X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,4> >                         },
18254                 {       "OpTranspose",                  "3x4",          1,      MAT4X3, MAT3X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,4> >                         },
18255                 {       "OpTranspose",                  "4x4",          1,      MAT4X4, MAT4X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,4> >                         },
18256                 {       "OpMatrixTimesScalar",  "2x2",          2,      MAT2X2, MAT2X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4,  1,  0, fp16MatrixTimesScalar<2,2> >         },
18257                 {       "OpMatrixTimesScalar",  "2x3",          2,      MAT2X3, MAT2X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,3> >         },
18258                 {       "OpMatrixTimesScalar",  "2x4",          2,      MAT2X4, MAT2X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,4> >         },
18259                 {       "OpMatrixTimesScalar",  "3x2",          2,      MAT3X2, MAT3X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<3,2> >         },
18260                 {       "OpMatrixTimesScalar",  "3x3",          2,      MAT3X3, MAT3X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,3> >         },
18261                 {       "OpMatrixTimesScalar",  "3x4",          2,      MAT3X4, MAT3X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,4> >         },
18262                 {       "OpMatrixTimesScalar",  "4x2",          2,      MAT4X2, MAT4X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<4,2> >         },
18263                 {       "OpMatrixTimesScalar",  "4x3",          2,      MAT4X3, MAT4X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,3> >         },
18264                 {       "OpMatrixTimesScalar",  "4x4",          2,      MAT4X4, MAT4X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,4> >         },
18265                 {       "OpVectorTimesMatrix",  "2x2",          2,      VEC2,   VEC2,   MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  2,  4,  0, fp16VectorTimesMatrix<2,2> >         },
18266                 {       "OpVectorTimesMatrix",  "2x3",          2,      VEC2,   VEC3,   MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  3,  8,  0, fp16VectorTimesMatrix<2,3> >         },
18267                 {       "OpVectorTimesMatrix",  "2x4",          2,      VEC2,   VEC4,   MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  4,  8,  0, fp16VectorTimesMatrix<2,4> >         },
18268                 {       "OpVectorTimesMatrix",  "3x2",          2,      VEC3,   VEC2,   MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  2,  8,  0, fp16VectorTimesMatrix<3,2> >         },
18269                 {       "OpVectorTimesMatrix",  "3x3",          2,      VEC3,   VEC3,   MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  3, 16,  0, fp16VectorTimesMatrix<3,3> >         },
18270                 {       "OpVectorTimesMatrix",  "3x4",          2,      VEC3,   VEC4,   MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  4, 16,  0, fp16VectorTimesMatrix<3,4> >         },
18271                 {       "OpVectorTimesMatrix",  "4x2",          2,      VEC4,   VEC2,   MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  2,  8,  0, fp16VectorTimesMatrix<4,2> >         },
18272                 {       "OpVectorTimesMatrix",  "4x3",          2,      VEC4,   VEC3,   MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  3, 16,  0, fp16VectorTimesMatrix<4,3> >         },
18273                 {       "OpVectorTimesMatrix",  "4x4",          2,      VEC4,   VEC4,   MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4, 16,  0, fp16VectorTimesMatrix<4,4> >         },
18274                 {       "OpMatrixTimesVector",  "2x2",          2,      VEC2,   MAT2X2, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  4,  2,  0, fp16MatrixTimesVector<2,2> >         },
18275                 {       "OpMatrixTimesVector",  "2x3",          2,      VEC3,   MAT2X3, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  8,  2,  0, fp16MatrixTimesVector<2,3> >         },
18276                 {       "OpMatrixTimesVector",  "2x4",          2,      VEC4,   MAT2X4, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  2,  0, fp16MatrixTimesVector<2,4> >         },
18277                 {       "OpMatrixTimesVector",  "3x2",          2,      VEC2,   MAT3X2, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  8,  3,  0, fp16MatrixTimesVector<3,2> >         },
18278                 {       "OpMatrixTimesVector",  "3x3",          2,      VEC3,   MAT3X3, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3, 16,  3,  0, fp16MatrixTimesVector<3,3> >         },
18279                 {       "OpMatrixTimesVector",  "3x4",          2,      VEC4,   MAT3X4, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4, 16,  3,  0, fp16MatrixTimesVector<3,4> >         },
18280                 {       "OpMatrixTimesVector",  "4x2",          2,      VEC2,   MAT4X2, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  8,  4,  0, fp16MatrixTimesVector<4,2> >         },
18281                 {       "OpMatrixTimesVector",  "4x3",          2,      VEC3,   MAT4X3, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3, 16,  4,  0, fp16MatrixTimesVector<4,3> >         },
18282                 {       "OpMatrixTimesVector",  "4x4",          2,      VEC4,   MAT4X4, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4, 16,  4,  0, fp16MatrixTimesVector<4,4> >         },
18283                 {       "OpMatrixTimesMatrix",  "2x2_2x2",      2,      MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4,  4,  0, fp16MatrixTimesMatrix<2,2,2,2> >     },
18284                 {       "OpMatrixTimesMatrix",  "2x2_3x2",      2,      MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,3,2> >     },
18285                 {       "OpMatrixTimesMatrix",  "2x2_4x2",      2,      MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,4,2> >     },
18286                 {       "OpMatrixTimesMatrix",  "2x3_2x2",      2,      MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,3,2,2> >     },
18287                 {       "OpMatrixTimesMatrix",  "2x3_3x2",      2,      MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,3,2> >     },
18288                 {       "OpMatrixTimesMatrix",  "2x3_4x2",      2,      MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,4,2> >     },
18289                 {       "OpMatrixTimesMatrix",  "2x4_2x2",      2,      MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,4,2,2> >     },
18290                 {       "OpMatrixTimesMatrix",  "2x4_3x2",      2,      MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,3,2> >     },
18291                 {       "OpMatrixTimesMatrix",  "2x4_4x2",      2,      MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,4,2> >     },
18292                 {       "OpMatrixTimesMatrix",  "3x2_2x3",      2,      MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<3,2,2,3> >     },
18293                 {       "OpMatrixTimesMatrix",  "3x2_3x3",      2,      MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,3,3> >     },
18294                 {       "OpMatrixTimesMatrix",  "3x2_4x3",      2,      MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,4,3> >     },
18295                 {       "OpMatrixTimesMatrix",  "3x3_2x3",      2,      MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,3,2,3> >     },
18296                 {       "OpMatrixTimesMatrix",  "3x3_3x3",      2,      MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,3,3> >     },
18297                 {       "OpMatrixTimesMatrix",  "3x3_4x3",      2,      MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,4,3> >     },
18298                 {       "OpMatrixTimesMatrix",  "3x4_2x3",      2,      MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,4,2,3> >     },
18299                 {       "OpMatrixTimesMatrix",  "3x4_3x3",      2,      MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,3,3> >     },
18300                 {       "OpMatrixTimesMatrix",  "3x4_4x3",      2,      MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,4,3> >     },
18301                 {       "OpMatrixTimesMatrix",  "4x2_2x4",      2,      MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<4,2,2,4> >     },
18302                 {       "OpMatrixTimesMatrix",  "4x2_3x4",      2,      MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,3,4> >     },
18303                 {       "OpMatrixTimesMatrix",  "4x2_4x4",      2,      MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,4,4> >     },
18304                 {       "OpMatrixTimesMatrix",  "4x3_2x4",      2,      MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,3,2,4> >     },
18305                 {       "OpMatrixTimesMatrix",  "4x3_3x4",      2,      MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,3,4> >     },
18306                 {       "OpMatrixTimesMatrix",  "4x3_4x4",      2,      MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,4,4> >     },
18307                 {       "OpMatrixTimesMatrix",  "4x4_2x4",      2,      MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,4,2,4> >     },
18308                 {       "OpMatrixTimesMatrix",  "4x4_3x4",      2,      MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,3,4> >     },
18309                 {       "OpMatrixTimesMatrix",  "4x4_4x4",      2,      MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,4,4> >     },
18310                 {       "OpOuterProduct",               "2x2",          2,      MAT2X2, VEC2,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  2,  2,  0, fp16OuterProduct<2,2> >                      },
18311                 {       "OpOuterProduct",               "2x3",          2,      MAT2X3, VEC3,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  3,  2,  0, fp16OuterProduct<2,3> >                      },
18312                 {       "OpOuterProduct",               "2x4",          2,      MAT2X4, VEC4,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  2,  0, fp16OuterProduct<2,4> >                      },
18313                 {       "OpOuterProduct",               "3x2",          2,      MAT3X2, VEC2,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  2,  3,  0, fp16OuterProduct<3,2> >                      },
18314                 {       "OpOuterProduct",               "3x3",          2,      MAT3X3, VEC3,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  3,  3,  0, fp16OuterProduct<3,3> >                      },
18315                 {       "OpOuterProduct",               "3x4",          2,      MAT3X4, VEC4,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  4,  3,  0, fp16OuterProduct<3,4> >                      },
18316                 {       "OpOuterProduct",               "4x2",          2,      MAT4X2, VEC2,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  2,  4,  0, fp16OuterProduct<4,2> >                      },
18317                 {       "OpOuterProduct",               "4x3",          2,      MAT4X3, VEC3,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  3,  4,  0, fp16OuterProduct<4,3> >                      },
18318                 {       "OpOuterProduct",               "4x4",          2,      MAT4X4, VEC4,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  4,  4,  0, fp16OuterProduct<4,4> >                      },
18319                 {       "Determinant",                  "2x2",          1,      SCALAR, MAT2X2, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1,  4,  0,  0, fp16Determinant<2> >                         },
18320                 {       "Determinant",                  "3x3",          1,      SCALAR, MAT3X3, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<3> >                         },
18321                 {       "Determinant",                  "4x4",          1,      SCALAR, MAT4X4, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<4> >                         },
18322                 {       "MatrixInverse",                "2x2",          1,      MAT2X2, MAT2X2, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Inverse<2> >                                     },
18323         };
18324
18325         for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18326         {
18327                 const Math16TestFunc&   testFunc        = testFuncs[testFuncIdx];
18328
18329                 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18330         }
18331
18332         return testGroup.release();
18333 }
18334
18335 const string getNumberTypeName (const NumberType type)
18336 {
18337         if (type == NUMBERTYPE_INT32)
18338         {
18339                 return "int";
18340         }
18341         else if (type == NUMBERTYPE_UINT32)
18342         {
18343                 return "uint";
18344         }
18345         else if (type == NUMBERTYPE_FLOAT32)
18346         {
18347                 return "float";
18348         }
18349         else
18350         {
18351                 DE_ASSERT(false);
18352                 return "";
18353         }
18354 }
18355
18356 deInt32 getInt(de::Random& rnd)
18357 {
18358         return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
18359 }
18360
18361 const string repeatString (const string& str, int times)
18362 {
18363         string filler;
18364         for (int i = 0; i < times; ++i)
18365         {
18366                 filler += str;
18367         }
18368         return filler;
18369 }
18370
18371 const string getRandomConstantString (const NumberType type, de::Random& rnd)
18372 {
18373         if (type == NUMBERTYPE_INT32)
18374         {
18375                 return numberToString<deInt32>(getInt(rnd));
18376         }
18377         else if (type == NUMBERTYPE_UINT32)
18378         {
18379                 return numberToString<deUint32>(rnd.getUint32());
18380         }
18381         else if (type == NUMBERTYPE_FLOAT32)
18382         {
18383                 return numberToString<float>(rnd.getFloat());
18384         }
18385         else
18386         {
18387                 DE_ASSERT(false);
18388                 return "";
18389         }
18390 }
18391
18392 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
18393 {
18394         map<string, string> params;
18395
18396         // Vec2 to Vec4
18397         for (int width = 2; width <= 4; ++width)
18398         {
18399                 const string randomConst = numberToString(getInt(rnd));
18400                 const string widthStr = numberToString(width);
18401                 const string composite_type = "${customType}vec" + widthStr;
18402                 const int index = rnd.getInt(0, width-1);
18403
18404                 params["type"]                  = "vec";
18405                 params["name"]                  = params["type"] + "_" + widthStr;
18406                 params["compositeDecl"]         = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
18407                 params["compositeType"]         = composite_type;
18408                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
18409                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
18410                 params["indexes"]               = numberToString(index);
18411                 testCases.push_back(params);
18412         }
18413 }
18414
18415 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
18416 {
18417         const int limit = 10;
18418         map<string, string> params;
18419
18420         for (int width = 2; width <= limit; ++width)
18421         {
18422                 string randomConst = numberToString(getInt(rnd));
18423                 string widthStr = numberToString(width);
18424                 int index = rnd.getInt(0, width-1);
18425
18426                 params["type"]                  = "array";
18427                 params["name"]                  = params["type"] + "_" + widthStr;
18428                 params["compositeDecl"]         = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
18429                                                                                         +        "%composite = OpTypeArray ${customType} %arraywidth\n";
18430                 params["compositeType"]         = "%composite";
18431                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
18432                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
18433                 params["indexes"]               = numberToString(index);
18434                 testCases.push_back(params);
18435         }
18436 }
18437
18438 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
18439 {
18440         const int limit = 10;
18441         map<string, string> params;
18442
18443         for (int width = 2; width <= limit; ++width)
18444         {
18445                 string randomConst = numberToString(getInt(rnd));
18446                 int index = rnd.getInt(0, width-1);
18447
18448                 params["type"]                  = "struct";
18449                 params["name"]                  = params["type"] + "_" + numberToString(width);
18450                 params["compositeDecl"]         = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
18451                 params["compositeType"]         = "%composite";
18452                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
18453                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
18454                 params["indexes"]               = numberToString(index);
18455                 testCases.push_back(params);
18456         }
18457 }
18458
18459 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
18460 {
18461         map<string, string> params;
18462
18463         // Vec2 to Vec4
18464         for (int width = 2; width <= 4; ++width)
18465         {
18466                 string widthStr = numberToString(width);
18467
18468                 for (int column = 2 ; column <= 4; ++column)
18469                 {
18470                         int index_0 = rnd.getInt(0, column-1);
18471                         int index_1 = rnd.getInt(0, width-1);
18472                         string columnStr = numberToString(column);
18473
18474                         params["type"]          = "matrix";
18475                         params["name"]          = params["type"] + "_" + widthStr + "x" + columnStr;
18476                         params["compositeDecl"] = string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
18477                                                                                                 +        "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
18478                         params["compositeType"] = "%composite";
18479
18480                         params["filler"]        = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
18481                                                                                                 +        "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
18482
18483                         params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
18484                         params["indexes"]       = numberToString(index_0) + " " + numberToString(index_1);
18485                         testCases.push_back(params);
18486                 }
18487         }
18488 }
18489
18490 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
18491 {
18492         createVectorCompositeCases(testCases, rnd, type);
18493         createArrayCompositeCases(testCases, rnd, type);
18494         createStructCompositeCases(testCases, rnd, type);
18495         // Matrix only supports float types
18496         if (type == NUMBERTYPE_FLOAT32)
18497         {
18498                 createMatrixCompositeCases(testCases, rnd, type);
18499         }
18500 }
18501
18502 const string getAssemblyTypeDeclaration (const NumberType type)
18503 {
18504         switch (type)
18505         {
18506                 case NUMBERTYPE_INT32:          return "OpTypeInt 32 1";
18507                 case NUMBERTYPE_UINT32:         return "OpTypeInt 32 0";
18508                 case NUMBERTYPE_FLOAT32:        return "OpTypeFloat 32";
18509                 default:                        DE_ASSERT(false); return "";
18510         }
18511 }
18512
18513 const string getAssemblyTypeName (const NumberType type)
18514 {
18515         switch (type)
18516         {
18517                 case NUMBERTYPE_INT32:          return "%i32";
18518                 case NUMBERTYPE_UINT32:         return "%u32";
18519                 case NUMBERTYPE_FLOAT32:        return "%f32";
18520                 default:                        DE_ASSERT(false); return "";
18521         }
18522 }
18523
18524 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
18525 {
18526         map<string, string>     parameters(params);
18527
18528         const string customType = getAssemblyTypeName(type);
18529         map<string, string> substCustomType;
18530         substCustomType["customType"] = customType;
18531         parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
18532         parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
18533         parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
18534         parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
18535         parameters["customType"] = customType;
18536         parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
18537
18538         if (parameters.at("compositeType") != "%u32vec3")
18539         {
18540                 parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
18541         }
18542
18543         return StringTemplate(
18544                 "OpCapability Shader\n"
18545                 "OpCapability Matrix\n"
18546                 "OpMemoryModel Logical GLSL450\n"
18547                 "OpEntryPoint GLCompute %main \"main\" %id\n"
18548                 "OpExecutionMode %main LocalSize 1 1 1\n"
18549
18550                 "OpSource GLSL 430\n"
18551                 "OpName %main           \"main\"\n"
18552                 "OpName %id             \"gl_GlobalInvocationID\"\n"
18553
18554                 // Decorators
18555                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
18556                 "OpDecorate %buf BufferBlock\n"
18557                 "OpDecorate %indata DescriptorSet 0\n"
18558                 "OpDecorate %indata Binding 0\n"
18559                 "OpDecorate %outdata DescriptorSet 0\n"
18560                 "OpDecorate %outdata Binding 1\n"
18561                 "OpDecorate %customarr ArrayStride 4\n"
18562                 "${compositeDecorator}"
18563                 "OpMemberDecorate %buf 0 Offset 0\n"
18564
18565                 // General types
18566                 "%void      = OpTypeVoid\n"
18567                 "%voidf     = OpTypeFunction %void\n"
18568                 "%u32       = OpTypeInt 32 0\n"
18569                 "%i32       = OpTypeInt 32 1\n"
18570                 "%f32       = OpTypeFloat 32\n"
18571
18572                 // Composite declaration
18573                 "${compositeDecl}"
18574
18575                 // Constants
18576                 "${filler}"
18577
18578                 "${u32vec3Decl:opt}"
18579                 "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
18580
18581                 // Inherited from custom
18582                 "%customptr = OpTypePointer Uniform ${customType}\n"
18583                 "%customarr = OpTypeRuntimeArray ${customType}\n"
18584                 "%buf       = OpTypeStruct %customarr\n"
18585                 "%bufptr    = OpTypePointer Uniform %buf\n"
18586
18587                 "%indata    = OpVariable %bufptr Uniform\n"
18588                 "%outdata   = OpVariable %bufptr Uniform\n"
18589
18590                 "%id        = OpVariable %uvec3ptr Input\n"
18591                 "%zero      = OpConstant %i32 0\n"
18592
18593                 "%main      = OpFunction %void None %voidf\n"
18594                 "%label     = OpLabel\n"
18595                 "%idval     = OpLoad %u32vec3 %id\n"
18596                 "%x         = OpCompositeExtract %u32 %idval 0\n"
18597
18598                 "%inloc     = OpAccessChain %customptr %indata %zero %x\n"
18599                 "%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
18600                 // Read the input value
18601                 "%inval     = OpLoad ${customType} %inloc\n"
18602                 // Create the composite and fill it
18603                 "${compositeConstruct}"
18604                 // Insert the input value to a place
18605                 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
18606                 // Read back the value from the position
18607                 "%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
18608                 // Store it in the output position
18609                 "             OpStore %outloc %out_val\n"
18610                 "             OpReturn\n"
18611                 "             OpFunctionEnd\n"
18612         ).specialize(parameters);
18613 }
18614
18615 template<typename T>
18616 BufferSp createCompositeBuffer(T number)
18617 {
18618         return BufferSp(new Buffer<T>(vector<T>(1, number)));
18619 }
18620
18621 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
18622 {
18623         de::MovePtr<tcu::TestCaseGroup> group   (new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
18624         de::Random                                              rnd             (deStringHash(group->getName()));
18625
18626         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
18627         {
18628                 NumberType                                              numberType              = NumberType(type);
18629                 const string                                    typeName                = getNumberTypeName(numberType);
18630                 const string                                    description             = "Test the OpCompositeInsert instruction with " + typeName + "s";
18631                 de::MovePtr<tcu::TestCaseGroup> subGroup                (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
18632                 vector<map<string, string> >    testCases;
18633
18634                 createCompositeCases(testCases, rnd, numberType);
18635
18636                 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
18637                 {
18638                         ComputeShaderSpec       spec;
18639
18640                         spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
18641
18642                         switch (numberType)
18643                         {
18644                                 case NUMBERTYPE_INT32:
18645                                 {
18646                                         deInt32 number = getInt(rnd);
18647                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
18648                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
18649                                         break;
18650                                 }
18651                                 case NUMBERTYPE_UINT32:
18652                                 {
18653                                         deUint32 number = rnd.getUint32();
18654                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
18655                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
18656                                         break;
18657                                 }
18658                                 case NUMBERTYPE_FLOAT32:
18659                                 {
18660                                         float number = rnd.getFloat();
18661                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
18662                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
18663                                         break;
18664                                 }
18665                                 default:
18666                                         DE_ASSERT(false);
18667                         }
18668
18669                         spec.numWorkGroups = IVec3(1, 1, 1);
18670                         subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
18671                 }
18672                 group->addChild(subGroup.release());
18673         }
18674         return group.release();
18675 }
18676
18677 struct AssemblyStructInfo
18678 {
18679         AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
18680         : components    (comp)
18681         , index                 (idx)
18682         {}
18683
18684         deUint32 components;
18685         deUint32 index;
18686 };
18687
18688 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
18689 {
18690         // Create the full index string
18691         string                          fullIndex       = numberToString(structInfo.index) + " " + params.at("indexes");
18692         // Convert it to list of indexes
18693         vector<string>          indexes         = de::splitString(fullIndex, ' ');
18694
18695         map<string, string>     parameters      (params);
18696         parameters["structType"]        = repeatString(" ${compositeType}", structInfo.components);
18697         parameters["structConstruct"]   = repeatString(" %instance", structInfo.components);
18698         parameters["insertIndexes"]     = fullIndex;
18699
18700         // In matrix cases the last two index is the CompositeExtract indexes
18701         const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
18702
18703         // Construct the extractIndex
18704         for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
18705         {
18706                 parameters["extractIndexes"] += " " + *index;
18707         }
18708
18709         // Remove the last 1 or 2 element depends on matrix case or not
18710         indexes.erase(indexes.end() - extractIndexes, indexes.end());
18711
18712         deUint32 id = 0;
18713         // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
18714         for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
18715         {
18716                 string indexId = "%index_" + numberToString(id++);
18717                 parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
18718                 parameters["accessChainIndexes"] += " " + indexId;
18719         }
18720
18721         parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
18722
18723         const string customType = getAssemblyTypeName(type);
18724         map<string, string> substCustomType;
18725         substCustomType["customType"] = customType;
18726         parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
18727         parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
18728         parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
18729         parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
18730         parameters["customType"] = customType;
18731
18732         const string compositeType = parameters.at("compositeType");
18733         map<string, string> substCompositeType;
18734         substCompositeType["compositeType"] = compositeType;
18735         parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
18736         if (compositeType != "%u32vec3")
18737         {
18738                 parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
18739         }
18740
18741         return StringTemplate(
18742                 "OpCapability Shader\n"
18743                 "OpCapability Matrix\n"
18744                 "OpMemoryModel Logical GLSL450\n"
18745                 "OpEntryPoint GLCompute %main \"main\" %id\n"
18746                 "OpExecutionMode %main LocalSize 1 1 1\n"
18747
18748                 "OpSource GLSL 430\n"
18749                 "OpName %main           \"main\"\n"
18750                 "OpName %id             \"gl_GlobalInvocationID\"\n"
18751                 // Decorators
18752                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
18753                 "OpDecorate %buf BufferBlock\n"
18754                 "OpDecorate %indata DescriptorSet 0\n"
18755                 "OpDecorate %indata Binding 0\n"
18756                 "OpDecorate %outdata DescriptorSet 0\n"
18757                 "OpDecorate %outdata Binding 1\n"
18758                 "OpDecorate %customarr ArrayStride 4\n"
18759                 "${compositeDecorator}"
18760                 "OpMemberDecorate %buf 0 Offset 0\n"
18761                 // General types
18762                 "%void      = OpTypeVoid\n"
18763                 "%voidf     = OpTypeFunction %void\n"
18764                 "%i32       = OpTypeInt 32 1\n"
18765                 "%u32       = OpTypeInt 32 0\n"
18766                 "%f32       = OpTypeFloat 32\n"
18767                 // Custom types
18768                 "${compositeDecl}"
18769                 // %u32vec3 if not already declared in ${compositeDecl}
18770                 "${u32vec3Decl:opt}"
18771                 "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
18772                 // Inherited from composite
18773                 "%composite_p = OpTypePointer Function ${compositeType}\n"
18774                 "%struct_t  = OpTypeStruct${structType}\n"
18775                 "%struct_p  = OpTypePointer Function %struct_t\n"
18776                 // Constants
18777                 "${filler}"
18778                 "${accessChainConstDeclaration}"
18779                 // Inherited from custom
18780                 "%customptr = OpTypePointer Uniform ${customType}\n"
18781                 "%customarr = OpTypeRuntimeArray ${customType}\n"
18782                 "%buf       = OpTypeStruct %customarr\n"
18783                 "%bufptr    = OpTypePointer Uniform %buf\n"
18784                 "%indata    = OpVariable %bufptr Uniform\n"
18785                 "%outdata   = OpVariable %bufptr Uniform\n"
18786
18787                 "%id        = OpVariable %uvec3ptr Input\n"
18788                 "%zero      = OpConstant %u32 0\n"
18789                 "%main      = OpFunction %void None %voidf\n"
18790                 "%label     = OpLabel\n"
18791                 "%struct_v  = OpVariable %struct_p Function\n"
18792                 "%idval     = OpLoad %u32vec3 %id\n"
18793                 "%x         = OpCompositeExtract %u32 %idval 0\n"
18794                 // Create the input/output type
18795                 "%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
18796                 "%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
18797                 // Read the input value
18798                 "%inval     = OpLoad ${customType} %inloc\n"
18799                 // Create the composite and fill it
18800                 "${compositeConstruct}"
18801                 // Create the struct and fill it with the composite
18802                 "%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
18803                 // Insert the value
18804                 "%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
18805                 // Store the object
18806                 "             OpStore %struct_v %comp_obj\n"
18807                 // Get deepest possible composite pointer
18808                 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
18809                 "%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
18810                 // Read back the stored value
18811                 "%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
18812                 "             OpStore %outloc %read_val\n"
18813                 "             OpReturn\n"
18814                 "             OpFunctionEnd\n"
18815         ).specialize(parameters);
18816 }
18817
18818 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
18819 {
18820         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
18821         de::Random                                              rnd                             (deStringHash(group->getName()));
18822
18823         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
18824         {
18825                 NumberType                                              numberType      = NumberType(type);
18826                 const string                                    typeName        = getNumberTypeName(numberType);
18827                 const string                                    description     = "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
18828                 de::MovePtr<tcu::TestCaseGroup> subGroup        (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
18829
18830                 vector<map<string, string> >    testCases;
18831                 createCompositeCases(testCases, rnd, numberType);
18832
18833                 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
18834                 {
18835                         ComputeShaderSpec       spec;
18836
18837                         // Number of components inside of a struct
18838                         deUint32 structComponents = rnd.getInt(2, 8);
18839                         // Component index value
18840                         deUint32 structIndex = rnd.getInt(0, structComponents - 1);
18841                         AssemblyStructInfo structInfo(structComponents, structIndex);
18842
18843                         spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
18844
18845                         switch (numberType)
18846                         {
18847                                 case NUMBERTYPE_INT32:
18848                                 {
18849                                         deInt32 number = getInt(rnd);
18850                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
18851                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
18852                                         break;
18853                                 }
18854                                 case NUMBERTYPE_UINT32:
18855                                 {
18856                                         deUint32 number = rnd.getUint32();
18857                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
18858                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
18859                                         break;
18860                                 }
18861                                 case NUMBERTYPE_FLOAT32:
18862                                 {
18863                                         float number = rnd.getFloat();
18864                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
18865                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
18866                                         break;
18867                                 }
18868                                 default:
18869                                         DE_ASSERT(false);
18870                         }
18871                         spec.numWorkGroups = IVec3(1, 1, 1);
18872                         subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
18873                 }
18874                 group->addChild(subGroup.release());
18875         }
18876         return group.release();
18877 }
18878
18879 // If the params missing, uninitialized case
18880 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
18881 {
18882         map<string, string> parameters(params);
18883
18884         parameters["customType"]        = getAssemblyTypeName(type);
18885
18886         // Declare the const value, and use it in the initializer
18887         if (params.find("constValue") != params.end())
18888         {
18889                 parameters["variableInitializer"]       = " %const";
18890         }
18891         // Uninitialized case
18892         else
18893         {
18894                 parameters["commentDecl"]       = ";";
18895         }
18896
18897         return StringTemplate(
18898                 "OpCapability Shader\n"
18899                 "OpMemoryModel Logical GLSL450\n"
18900                 "OpEntryPoint GLCompute %main \"main\" %id\n"
18901                 "OpExecutionMode %main LocalSize 1 1 1\n"
18902                 "OpSource GLSL 430\n"
18903                 "OpName %main           \"main\"\n"
18904                 "OpName %id             \"gl_GlobalInvocationID\"\n"
18905                 // Decorators
18906                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
18907                 "OpDecorate %indata DescriptorSet 0\n"
18908                 "OpDecorate %indata Binding 0\n"
18909                 "OpDecorate %outdata DescriptorSet 0\n"
18910                 "OpDecorate %outdata Binding 1\n"
18911                 "OpDecorate %in_arr ArrayStride 4\n"
18912                 "OpDecorate %in_buf BufferBlock\n"
18913                 "OpMemberDecorate %in_buf 0 Offset 0\n"
18914                 // Base types
18915                 "%void       = OpTypeVoid\n"
18916                 "%voidf      = OpTypeFunction %void\n"
18917                 "%u32        = OpTypeInt 32 0\n"
18918                 "%i32        = OpTypeInt 32 1\n"
18919                 "%f32        = OpTypeFloat 32\n"
18920                 "%uvec3      = OpTypeVector %u32 3\n"
18921                 "%uvec3ptr   = OpTypePointer Input %uvec3\n"
18922                 "${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
18923                 // Derived types
18924                 "%in_ptr     = OpTypePointer Uniform ${customType}\n"
18925                 "%in_arr     = OpTypeRuntimeArray ${customType}\n"
18926                 "%in_buf     = OpTypeStruct %in_arr\n"
18927                 "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
18928                 "%indata     = OpVariable %in_bufptr Uniform\n"
18929                 "%outdata    = OpVariable %in_bufptr Uniform\n"
18930                 "%id         = OpVariable %uvec3ptr Input\n"
18931                 "%var_ptr    = OpTypePointer Function ${customType}\n"
18932                 // Constants
18933                 "%zero       = OpConstant %i32 0\n"
18934                 // Main function
18935                 "%main       = OpFunction %void None %voidf\n"
18936                 "%label      = OpLabel\n"
18937                 "%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
18938                 "%idval      = OpLoad %uvec3 %id\n"
18939                 "%x          = OpCompositeExtract %u32 %idval 0\n"
18940                 "%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
18941                 "%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
18942
18943                 "%outval     = OpLoad ${customType} %out_var\n"
18944                 "              OpStore %outloc %outval\n"
18945                 "              OpReturn\n"
18946                 "              OpFunctionEnd\n"
18947         ).specialize(parameters);
18948 }
18949
18950 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
18951 {
18952         DE_ASSERT(outputAllocs.size() != 0);
18953         DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
18954
18955         // Use custom epsilon because of the float->string conversion
18956         const float     epsilon = 0.00001f;
18957
18958         for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
18959         {
18960                 vector<deUint8> expectedBytes;
18961                 float                   expected;
18962                 float                   actual;
18963
18964                 expectedOutputs[outputNdx].getBytes(expectedBytes);
18965                 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
18966                 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
18967
18968                 // Test with epsilon
18969                 if (fabs(expected - actual) > epsilon)
18970                 {
18971                         log << TestLog::Message << "Error: The actual and expected values not matching."
18972                                 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
18973                         return false;
18974                 }
18975         }
18976         return true;
18977 }
18978
18979 // Checks if the driver crash with uninitialized cases
18980 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
18981 {
18982         DE_ASSERT(outputAllocs.size() != 0);
18983         DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
18984
18985         // Copy and discard the result.
18986         for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
18987         {
18988                 vector<deUint8> expectedBytes;
18989                 expectedOutputs[outputNdx].getBytes(expectedBytes);
18990
18991                 const size_t    width                   = expectedBytes.size();
18992                 vector<char>    data                    (width);
18993
18994                 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
18995         }
18996         return true;
18997 }
18998
18999 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19000 {
19001         de::MovePtr<tcu::TestCaseGroup> group   (new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19002         de::Random                                              rnd             (deStringHash(group->getName()));
19003
19004         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19005         {
19006                 NumberType                                              numberType      = NumberType(type);
19007                 const string                                    typeName        = getNumberTypeName(numberType);
19008                 const string                                    description     = "Test the OpVariable initializer with " + typeName + ".";
19009                 de::MovePtr<tcu::TestCaseGroup> subGroup        (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19010
19011                 // 2 similar subcases (initialized and uninitialized)
19012                 for (int subCase = 0; subCase < 2; ++subCase)
19013                 {
19014                         ComputeShaderSpec spec;
19015                         spec.numWorkGroups = IVec3(1, 1, 1);
19016
19017                         map<string, string>                             params;
19018
19019                         switch (numberType)
19020                         {
19021                                 case NUMBERTYPE_INT32:
19022                                 {
19023                                         deInt32 number = getInt(rnd);
19024                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19025                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19026                                         params["constValue"] = numberToString(number);
19027                                         break;
19028                                 }
19029                                 case NUMBERTYPE_UINT32:
19030                                 {
19031                                         deUint32 number = rnd.getUint32();
19032                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19033                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19034                                         params["constValue"] = numberToString(number);
19035                                         break;
19036                                 }
19037                                 case NUMBERTYPE_FLOAT32:
19038                                 {
19039                                         float number = rnd.getFloat();
19040                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
19041                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
19042                                         spec.verifyIO = &compareFloats;
19043                                         params["constValue"] = numberToString(number);
19044                                         break;
19045                                 }
19046                                 default:
19047                                         DE_ASSERT(false);
19048                         }
19049
19050                         // Initialized subcase
19051                         if (!subCase)
19052                         {
19053                                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19054                                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
19055                         }
19056                         // Uninitialized subcase
19057                         else
19058                         {
19059                                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19060                                 spec.verifyIO = &passthruVerify;
19061                                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
19062                         }
19063                 }
19064                 group->addChild(subGroup.release());
19065         }
19066         return group.release();
19067 }
19068
19069 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19070 {
19071         de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19072         RGBA                                                    defaultColors[4];
19073         map<string, string>                             opNopFragments;
19074
19075         getDefaultColors(defaultColors);
19076
19077         opNopFragments["testfun"]               =
19078                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19079                 "%param1 = OpFunctionParameter %v4f32\n"
19080                 "%label_testfun = OpLabel\n"
19081                 "OpNop\n"
19082                 "OpNop\n"
19083                 "OpNop\n"
19084                 "OpNop\n"
19085                 "OpNop\n"
19086                 "OpNop\n"
19087                 "OpNop\n"
19088                 "OpNop\n"
19089                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19090                 "%b = OpFAdd %f32 %a %a\n"
19091                 "OpNop\n"
19092                 "%c = OpFSub %f32 %b %a\n"
19093                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19094                 "OpNop\n"
19095                 "OpNop\n"
19096                 "OpReturnValue %ret\n"
19097                 "OpFunctionEnd\n";
19098
19099         createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19100
19101         return testGroup.release();
19102 }
19103
19104 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19105 {
19106         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19107         RGBA                                                    defaultColors[4];
19108         map<string, string>                             opNameFragments;
19109
19110         getDefaultColors(defaultColors);
19111
19112         opNameFragments["testfun"] =
19113                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19114                 "%param1     = OpFunctionParameter %v4f32\n"
19115                 "%label_func = OpLabel\n"
19116                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19117                 "%b          = OpFAdd %f32 %a %a\n"
19118                 "%c          = OpFSub %f32 %b %a\n"
19119                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19120                 "OpReturnValue %ret\n"
19121                 "OpFunctionEnd\n";
19122
19123         opNameFragments["debug"] =
19124                 "OpName %BP_main \"not_main\"";
19125
19126         createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19127
19128         return testGroup.release();
19129 }
19130
19131 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19132 {
19133         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19134
19135         testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19136         testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19137         testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19138         testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19139         testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19140         testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19141         testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19142         testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19143         testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19144         testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19145         testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19146         testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19147         testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19148         testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19149         testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19150
19151         return testGroup.release();
19152 }
19153
19154 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19155 {
19156         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19157
19158         testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19159         testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19160         testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19161         testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19162         testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19163         testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19164         testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19165         testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19166         testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19167         testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19168         testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19169         testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19170         testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19171         testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19172         testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19173
19174         return testGroup.release();
19175 }
19176
19177 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19178 {
19179         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19180
19181         de::Random                                              rnd                             (deStringHash(group->getName()));
19182         const int               numElements             = 100;
19183         vector<float>   inputData               (numElements, 0);
19184         vector<float>   outputData              (numElements, 0);
19185         fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19186
19187         const StringTemplate                    shaderTemplate  (
19188                 "${CAPS}\n"
19189                 "OpMemoryModel Logical GLSL450\n"
19190                 "OpEntryPoint GLCompute %main \"main\" %id\n"
19191                 "OpExecutionMode %main LocalSize 1 1 1\n"
19192                 "OpSource GLSL 430\n"
19193                 "OpName %main           \"main\"\n"
19194                 "OpName %id             \"gl_GlobalInvocationID\"\n"
19195
19196                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19197
19198                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19199
19200                 "%id        = OpVariable %uvec3ptr Input\n"
19201                 "${CONST}\n"
19202                 "%main      = OpFunction %void None %voidf\n"
19203                 "%label     = OpLabel\n"
19204                 "%idval     = OpLoad %uvec3 %id\n"
19205                 "%x         = OpCompositeExtract %u32 %idval 0\n"
19206                 "%inloc     = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19207
19208                 "${TEST}\n"
19209
19210                 "%outloc    = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19211                 "             OpStore %outloc %res\n"
19212                 "             OpReturn\n"
19213                 "             OpFunctionEnd\n"
19214         );
19215
19216         // Each test case produces 4 boolean values, and we want each of these values
19217         // to come froma different combination of the available bit-sizes, so compute
19218         // all possible combinations here.
19219         vector<deUint32>        widths;
19220         widths.push_back(32);
19221         widths.push_back(16);
19222         widths.push_back(8);
19223
19224         vector<IVec4>   cases;
19225         for (size_t width0 = 0; width0 < widths.size(); width0++)
19226         {
19227                 for (size_t width1 = 0; width1 < widths.size(); width1++)
19228                 {
19229                         for (size_t width2 = 0; width2 < widths.size(); width2++)
19230                         {
19231                                 for (size_t width3 = 0; width3 < widths.size(); width3++)
19232                                 {
19233                                         cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19234                                 }
19235                         }
19236                 }
19237         }
19238
19239         for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19240         {
19241                 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19242                 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19243                         continue;
19244
19245                 map<string, string>     specializations;
19246                 ComputeShaderSpec       spec;
19247
19248                 // Inject appropriate capabilities and reference constants depending
19249                 // on the bit-sizes required by this test case
19250                 bool hasFloat32 = cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
19251                 bool hasFloat16 = cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
19252                 bool hasInt8    = cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
19253
19254                 string capsStr  = "OpCapability Shader\n";
19255                 string constStr =
19256                         "%c0i32     = OpConstant %i32 0\n"
19257                         "%c1f32     = OpConstant %f32 1.0\n"
19258                         "%c0f32     = OpConstant %f32 0.0\n";
19259
19260                 if (hasFloat32)
19261                 {
19262                         constStr        +=
19263                                 "%c10f32    = OpConstant %f32 10.0\n"
19264                                 "%c25f32    = OpConstant %f32 25.0\n"
19265                                 "%c50f32    = OpConstant %f32 50.0\n"
19266                                 "%c90f32    = OpConstant %f32 90.0\n";
19267                 }
19268
19269                 if (hasFloat16)
19270                 {
19271                         capsStr         += "OpCapability Float16\n";
19272                         constStr        +=
19273                                 "%f16       = OpTypeFloat 16\n"
19274                                 "%c10f16    = OpConstant %f16 10.0\n"
19275                                 "%c25f16    = OpConstant %f16 25.0\n"
19276                                 "%c50f16    = OpConstant %f16 50.0\n"
19277                                 "%c90f16    = OpConstant %f16 90.0\n";
19278                 }
19279
19280                 if (hasInt8)
19281                 {
19282                         capsStr         += "OpCapability Int8\n";
19283                         constStr        +=
19284                                 "%i8        = OpTypeInt 8 1\n"
19285                                 "%c10i8     = OpConstant %i8 10\n"
19286                                 "%c25i8     = OpConstant %i8 25\n"
19287                                 "%c50i8     = OpConstant %i8 50\n"
19288                                 "%c90i8     = OpConstant %i8 90\n";
19289                 }
19290
19291                 // Each invocation reads a different float32 value as input. Depending on
19292                 // the bit-sizes required by the particular test case, we also produce
19293                 // float16 and/or and int8 values by converting from the 32-bit float.
19294                 string testStr  = "";
19295                 testStr                 += "%inval32   = OpLoad %f32 %inloc\n";
19296                 if (hasFloat16)
19297                         testStr         += "%inval16   = OpFConvert %f16 %inval32\n";
19298                 if (hasInt8)
19299                         testStr         += "%inval8    = OpConvertFToS %i8 %inval32\n";
19300
19301                 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
19302                 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
19303                 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
19304                 // other way around, so in this case we want < instead of <=.
19305                 if (cases[caseNdx][0] == 32)
19306                         testStr         += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
19307                 else if (cases[caseNdx][0] == 16)
19308                         testStr         += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
19309                 else
19310                         testStr         += "%cmp1      = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
19311
19312                 if (cases[caseNdx][1] == 32)
19313                         testStr         += "%cmp2      = OpFOrdLessThan %bool %inval32 %c50f32\n";
19314                 else if (cases[caseNdx][1] == 16)
19315                         testStr         += "%cmp2      = OpFOrdLessThan %bool %inval16 %c50f16\n";
19316                 else
19317                         testStr         += "%cmp2      = OpSLessThan %bool %inval8 %c50i8\n";
19318
19319                 if (cases[caseNdx][2] == 32)
19320                         testStr         += "%cmp3      = OpFOrdLessThan %bool %inval32 %c10f32\n";
19321                 else if (cases[caseNdx][2] == 16)
19322                         testStr         += "%cmp3      = OpFOrdLessThan %bool %inval16 %c10f16\n";
19323                 else
19324                         testStr         += "%cmp3      = OpSLessThan %bool %inval8 %c10i8\n";
19325
19326                 if (cases[caseNdx][3] == 32)
19327                         testStr         += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
19328                 else if (cases[caseNdx][3] == 16)
19329                         testStr         += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
19330                 else
19331                         testStr         += "%cmp4      = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
19332
19333                 testStr                 += "%and1      = OpLogicalAnd %bool %cmp1 %cmp2\n";
19334                 testStr                 += "%or1       = OpLogicalOr %bool %cmp3 %cmp4\n";
19335                 testStr                 += "%or2       = OpLogicalOr %bool %and1 %or1\n";
19336                 testStr                 += "%not1      = OpLogicalNot %bool %or2\n";
19337                 testStr                 += "%res       = OpSelect %f32 %not1 %c1f32 %c0f32\n";
19338
19339                 specializations["CAPS"]         = capsStr;
19340                 specializations["CONST"]        = constStr;
19341                 specializations["TEST"]         = testStr;
19342
19343                 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
19344                 for (size_t ndx = 0; ndx < numElements; ++ndx)
19345                         outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
19346
19347                 spec.assembly = shaderTemplate.specialize(specializations);
19348                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
19349                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
19350                 spec.numWorkGroups = IVec3(numElements, 1, 1);
19351                 if (hasFloat16)
19352                         spec.requestedVulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_FLOAT16;
19353                 if (hasInt8)
19354                         spec.requestedVulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_INT8;
19355                 spec.extensions.push_back("VK_KHR_shader_float16_int8");
19356
19357                 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
19358                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
19359         }
19360
19361         return group.release();
19362 }
19363
19364 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
19365 {
19366         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
19367
19368         testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
19369
19370         return testGroup.release();
19371 }
19372
19373 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
19374 {
19375         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
19376         vector<CaseParameter>                   abuseCases;
19377         RGBA                                                    defaultColors[4];
19378         map<string, string>                             opNameFragments;
19379
19380         getOpNameAbuseCases(abuseCases);
19381         getDefaultColors(defaultColors);
19382
19383         opNameFragments["testfun"] =
19384                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19385                 "%param1     = OpFunctionParameter %v4f32\n"
19386                 "%label_func = OpLabel\n"
19387                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19388                 "%b          = OpFAdd %f32 %a %a\n"
19389                 "%c          = OpFSub %f32 %b %a\n"
19390                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19391                 "OpReturnValue %ret\n"
19392                 "OpFunctionEnd\n";
19393
19394         for (unsigned int i = 0; i < abuseCases.size(); i++)
19395         {
19396                 string casename;
19397                 casename = string("main") + abuseCases[i].name;
19398
19399                 opNameFragments["debug"] =
19400                         "OpName %BP_main \"" + abuseCases[i].param + "\"";
19401
19402                 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
19403         }
19404
19405         for (unsigned int i = 0; i < abuseCases.size(); i++)
19406         {
19407                 string casename;
19408                 casename = string("b") + abuseCases[i].name;
19409
19410                 opNameFragments["debug"] =
19411                         "OpName %b \"" + abuseCases[i].param + "\"";
19412
19413                 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
19414         }
19415
19416         {
19417                 opNameFragments["debug"] =
19418                         "OpName %test_code \"name1\"\n"
19419                         "OpName %param1    \"name2\"\n"
19420                         "OpName %a         \"name3\"\n"
19421                         "OpName %b         \"name4\"\n"
19422                         "OpName %c         \"name5\"\n"
19423                         "OpName %ret       \"name6\"\n";
19424
19425                 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
19426         }
19427
19428         {
19429                 opNameFragments["debug"] =
19430                         "OpName %test_code \"the_same\"\n"
19431                         "OpName %param1    \"the_same\"\n"
19432                         "OpName %a         \"the_same\"\n"
19433                         "OpName %b         \"the_same\"\n"
19434                         "OpName %c         \"the_same\"\n"
19435                         "OpName %ret       \"the_same\"\n";
19436
19437                 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
19438         }
19439
19440         {
19441                 opNameFragments["debug"] =
19442                         "OpName %BP_main \"to_be\"\n"
19443                         "OpName %BP_main \"or_not\"\n"
19444                         "OpName %BP_main \"to_be\"\n";
19445
19446                 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
19447         }
19448
19449         {
19450                 opNameFragments["debug"] =
19451                         "OpName %b \"to_be\"\n"
19452                         "OpName %b \"or_not\"\n"
19453                         "OpName %b \"to_be\"\n";
19454
19455                 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
19456         }
19457
19458         return abuseGroup.release();
19459 }
19460
19461
19462 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
19463 {
19464         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
19465         vector<CaseParameter>                   abuseCases;
19466         RGBA                                                    defaultColors[4];
19467         map<string, string>                             opMemberNameFragments;
19468
19469         getOpNameAbuseCases(abuseCases);
19470         getDefaultColors(defaultColors);
19471
19472         opMemberNameFragments["pre_main"] =
19473                 "%f3str = OpTypeStruct %f32 %f32 %f32\n";
19474
19475         opMemberNameFragments["testfun"] =
19476                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19477                 "%param1     = OpFunctionParameter %v4f32\n"
19478                 "%label_func = OpLabel\n"
19479                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19480                 "%b          = OpFAdd %f32 %a %a\n"
19481                 "%c          = OpFSub %f32 %b %a\n"
19482                 "%cstr       = OpCompositeConstruct %f3str %c %c %c\n"
19483                 "%d          = OpCompositeExtract %f32 %cstr 0\n"
19484                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
19485                 "OpReturnValue %ret\n"
19486                 "OpFunctionEnd\n";
19487
19488         for (unsigned int i = 0; i < abuseCases.size(); i++)
19489         {
19490                 string casename;
19491                 casename = string("f3str_x") + abuseCases[i].name;
19492
19493                 opMemberNameFragments["debug"] =
19494                         "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
19495
19496                 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
19497         }
19498
19499         {
19500                 opMemberNameFragments["debug"] =
19501                         "OpMemberName %f3str 0 \"name1\"\n"
19502                         "OpMemberName %f3str 1 \"name2\"\n"
19503                         "OpMemberName %f3str 2 \"name3\"\n";
19504
19505                 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
19506         }
19507
19508         {
19509                 opMemberNameFragments["debug"] =
19510                         "OpMemberName %f3str 0 \"the_same\"\n"
19511                         "OpMemberName %f3str 1 \"the_same\"\n"
19512                         "OpMemberName %f3str 2 \"the_same\"\n";
19513
19514                 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
19515         }
19516
19517         {
19518                 opMemberNameFragments["debug"] =
19519                         "OpMemberName %f3str 0 \"to_be\"\n"
19520                         "OpMemberName %f3str 1 \"or_not\"\n"
19521                         "OpMemberName %f3str 0 \"to_be\"\n"
19522                         "OpMemberName %f3str 2 \"makes_no\"\n"
19523                         "OpMemberName %f3str 0 \"difference\"\n"
19524                         "OpMemberName %f3str 0 \"to_me\"\n";
19525
19526
19527                 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
19528         }
19529
19530         return abuseGroup.release();
19531 }
19532
19533 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
19534 {
19535         vector<deUint32>        result;
19536         de::Random                      rnd             (seed);
19537
19538         result.reserve(numDataPoints);
19539
19540         for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
19541                 result.push_back(rnd.getUint32());
19542
19543         return result;
19544 }
19545
19546 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
19547 {
19548         vector<deUint32>        result;
19549
19550         result.reserve(inData1.size());
19551
19552         for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
19553                 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
19554
19555         return result;
19556 }
19557
19558 template<class SpecResource>
19559 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
19560 {
19561         const deUint32                  numDataPoints   = 16;
19562         const std::string               testName                ("sparse_ids");
19563         const deUint32                  seed                    (deStringHash(testName.c_str()));
19564         const vector<deUint32>  inData1                 (getSparseIdsAbuseData(numDataPoints, seed + 1));
19565         const vector<deUint32>  inData2                 (getSparseIdsAbuseData(numDataPoints, seed + 2));
19566         const vector<deUint32>  outData                 (getSparseIdsAbuseResults(inData1, inData2));
19567         const StringTemplate    preMain
19568         (
19569                 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
19570                 "   %up_u32 = OpTypePointer Uniform %u32\n"
19571                 "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
19572                 "   %SSBO32 = OpTypeStruct %ra_u32\n"
19573                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
19574                 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
19575                 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
19576                 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
19577         );
19578         const StringTemplate    decoration
19579         (
19580                 "OpDecorate %ra_u32 ArrayStride 4\n"
19581                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
19582                 "OpDecorate %SSBO32 BufferBlock\n"
19583                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
19584                 "OpDecorate %ssbo_src0 Binding 0\n"
19585                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
19586                 "OpDecorate %ssbo_src1 Binding 1\n"
19587                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
19588                 "OpDecorate %ssbo_dst Binding 2\n"
19589         );
19590         const StringTemplate    testFun
19591         (
19592                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19593                 "    %param = OpFunctionParameter %v4f32\n"
19594
19595                 "    %entry = OpLabel\n"
19596                 "        %i = OpVariable %fp_i32 Function\n"
19597                 "             OpStore %i %c_i32_0\n"
19598                 "             OpBranch %loop\n"
19599
19600                 "     %loop = OpLabel\n"
19601                 "    %i_cmp = OpLoad %i32 %i\n"
19602                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
19603                 "             OpLoopMerge %merge %next None\n"
19604                 "             OpBranchConditional %lt %write %merge\n"
19605
19606                 "    %write = OpLabel\n"
19607                 "      %ndx = OpLoad %i32 %i\n"
19608
19609                 "      %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
19610                 "      %128 = OpLoad %u32 %127\n"
19611
19612                 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
19613                 "  %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
19614                 "  %4194001 = OpLoad %u32 %4194000\n"
19615
19616                 "  %2097151 = OpIAdd %u32 %128 %4194001\n"
19617                 "  %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
19618                 "             OpStore %2097152 %2097151\n"
19619                 "             OpBranch %next\n"
19620
19621                 "     %next = OpLabel\n"
19622                 "    %i_cur = OpLoad %i32 %i\n"
19623                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
19624                 "             OpStore %i %i_new\n"
19625                 "             OpBranch %loop\n"
19626
19627                 "    %merge = OpLabel\n"
19628                 "             OpReturnValue %param\n"
19629
19630                 "             OpFunctionEnd\n"
19631         );
19632         SpecResource                    specResource;
19633         map<string, string>             specs;
19634         VulkanFeatures                  features;
19635         map<string, string>             fragments;
19636         vector<string>                  extensions;
19637
19638         specs["num_data_points"]        = de::toString(numDataPoints);
19639
19640         fragments["decoration"]         = decoration.specialize(specs);
19641         fragments["pre_main"]           = preMain.specialize(specs);
19642         fragments["testfun"]            = testFun.specialize(specs);
19643
19644         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19645         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19646         specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19647
19648         features.coreFeatures.vertexPipelineStoresAndAtomics    = true;
19649         features.coreFeatures.fragmentStoresAndAtomics                  = true;
19650
19651         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
19652 }
19653
19654 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
19655 {
19656         vector<deUint32>        result;
19657         de::Random                      rnd             (seed);
19658
19659         result.reserve(numDataPoints);
19660
19661         // Fixed value
19662         result.push_back(1u);
19663
19664         // Random values
19665         for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
19666                 result.push_back(rnd.getUint8());
19667
19668         return result;
19669 }
19670
19671 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
19672 {
19673         vector<deUint32>        result;
19674
19675         result.reserve(inData1.size());
19676
19677         for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
19678                 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
19679
19680         return result;
19681 }
19682
19683 template<class SpecResource>
19684 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
19685 {
19686         const deUint32                  numDataPoints   = 16;
19687         const deUint32                  firstNdx                = 100u;
19688         const deUint32                  sequenceCount   = 10000u;
19689         const std::string               testName                ("lots_ids");
19690         const deUint32                  seed                    (deStringHash(testName.c_str()));
19691         const vector<deUint32>  inData1                 (getLotsIdsAbuseData(numDataPoints, seed + 1));
19692         const vector<deUint32>  inData2                 (getLotsIdsAbuseData(numDataPoints, seed + 2));
19693         const vector<deUint32>  outData                 (getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
19694         const StringTemplate preMain
19695         (
19696                 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
19697                 "   %up_u32 = OpTypePointer Uniform %u32\n"
19698                 "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
19699                 "   %SSBO32 = OpTypeStruct %ra_u32\n"
19700                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
19701                 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
19702                 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
19703                 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
19704         );
19705         const StringTemplate decoration
19706         (
19707                 "OpDecorate %ra_u32 ArrayStride 4\n"
19708                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
19709                 "OpDecorate %SSBO32 BufferBlock\n"
19710                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
19711                 "OpDecorate %ssbo_src0 Binding 0\n"
19712                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
19713                 "OpDecorate %ssbo_src1 Binding 1\n"
19714                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
19715                 "OpDecorate %ssbo_dst Binding 2\n"
19716         );
19717         const StringTemplate testFun
19718         (
19719                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19720                 "    %param = OpFunctionParameter %v4f32\n"
19721
19722                 "    %entry = OpLabel\n"
19723                 "        %i = OpVariable %fp_i32 Function\n"
19724                 "             OpStore %i %c_i32_0\n"
19725                 "             OpBranch %loop\n"
19726
19727                 "     %loop = OpLabel\n"
19728                 "    %i_cmp = OpLoad %i32 %i\n"
19729                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
19730                 "             OpLoopMerge %merge %next None\n"
19731                 "             OpBranchConditional %lt %write %merge\n"
19732
19733                 "    %write = OpLabel\n"
19734                 "      %ndx = OpLoad %i32 %i\n"
19735
19736                 "       %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
19737                 "       %91 = OpLoad %u32 %90\n"
19738
19739                 "       %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
19740                 "       %${zeroth_id} = OpLoad %u32 %98\n"
19741
19742                 "${seq}\n"
19743
19744                 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
19745                 "      %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
19746                 "             OpStore %dst %${last_id}\n"
19747                 "             OpBranch %next\n"
19748
19749                 "     %next = OpLabel\n"
19750                 "    %i_cur = OpLoad %i32 %i\n"
19751                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
19752                 "             OpStore %i %i_new\n"
19753                 "             OpBranch %loop\n"
19754
19755                 "    %merge = OpLabel\n"
19756                 "             OpReturnValue %param\n"
19757
19758                 "             OpFunctionEnd\n"
19759         );
19760         deUint32                                lastId                  = firstNdx;
19761         SpecResource                    specResource;
19762         map<string, string>             specs;
19763         VulkanFeatures                  features;
19764         map<string, string>             fragments;
19765         vector<string>                  extensions;
19766         std::string                             sequence;
19767
19768         for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
19769         {
19770                 const deUint32          sequenceId              = sequenceNdx + firstNdx;
19771                 const std::string       sequenceIdStr   = de::toString(sequenceId);
19772
19773                 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
19774                 lastId = sequenceId;
19775
19776                 if (sequenceNdx == 0)
19777                         sequence.reserve((10 + sequence.length()) * sequenceCount);
19778         }
19779
19780         specs["num_data_points"]        = de::toString(numDataPoints);
19781         specs["zeroth_id"]                      = de::toString(firstNdx - 1);
19782         specs["last_id"]                        = de::toString(lastId);
19783         specs["seq"]                            = sequence;
19784
19785         fragments["decoration"]         = decoration.specialize(specs);
19786         fragments["pre_main"]           = preMain.specialize(specs);
19787         fragments["testfun"]            = testFun.specialize(specs);
19788
19789         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19790         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19791         specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19792
19793         features.coreFeatures.vertexPipelineStoresAndAtomics    = true;
19794         features.coreFeatures.fragmentStoresAndAtomics                  = true;
19795
19796         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
19797 }
19798
19799 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
19800 {
19801         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
19802
19803         createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
19804         createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
19805
19806         return testGroup.release();
19807 }
19808
19809 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
19810 {
19811         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
19812
19813         createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
19814         createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
19815
19816         return testGroup.release();
19817 }
19818
19819 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
19820 {
19821         const bool testComputePipeline = true;
19822
19823         de::MovePtr<tcu::TestCaseGroup> instructionTests        (new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
19824         de::MovePtr<tcu::TestCaseGroup> computeTests            (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
19825         de::MovePtr<tcu::TestCaseGroup> graphicsTests           (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
19826
19827         computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
19828         computeTests->addChild(createLocalSizeGroup(testCtx));
19829         computeTests->addChild(createOpNopGroup(testCtx));
19830         computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
19831         computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
19832         computeTests->addChild(createOpAtomicGroup(testCtx, false));
19833         computeTests->addChild(createOpAtomicGroup(testCtx, true));                                     // Using new StorageBuffer decoration
19834         computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true));        // Return value validation
19835         computeTests->addChild(createOpAtomicGroup(testCtx, true, 65536, false, true)); // volatile atomics
19836         computeTests->addChild(createOpLineGroup(testCtx));
19837         computeTests->addChild(createOpModuleProcessedGroup(testCtx));
19838         computeTests->addChild(createOpNoLineGroup(testCtx));
19839         computeTests->addChild(createOpConstantNullGroup(testCtx));
19840         computeTests->addChild(createOpConstantCompositeGroup(testCtx));
19841         computeTests->addChild(createOpConstantUsageGroup(testCtx));
19842         computeTests->addChild(createSpecConstantGroup(testCtx));
19843         computeTests->addChild(createOpSourceGroup(testCtx));
19844         computeTests->addChild(createOpSourceExtensionGroup(testCtx));
19845         computeTests->addChild(createDecorationGroupGroup(testCtx));
19846         computeTests->addChild(createOpPhiGroup(testCtx));
19847         computeTests->addChild(createLoopControlGroup(testCtx));
19848         computeTests->addChild(createFunctionControlGroup(testCtx));
19849         computeTests->addChild(createSelectionControlGroup(testCtx));
19850         computeTests->addChild(createBlockOrderGroup(testCtx));
19851         computeTests->addChild(createMultipleShaderGroup(testCtx));
19852         computeTests->addChild(createMemoryAccessGroup(testCtx));
19853         computeTests->addChild(createOpCopyMemoryGroup(testCtx));
19854         computeTests->addChild(createOpCopyObjectGroup(testCtx));
19855         computeTests->addChild(createNoContractionGroup(testCtx));
19856         computeTests->addChild(createOpUndefGroup(testCtx));
19857         computeTests->addChild(createOpUnreachableGroup(testCtx));
19858         computeTests->addChild(createOpQuantizeToF16Group(testCtx));
19859         computeTests->addChild(createOpFRemGroup(testCtx));
19860         computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
19861         computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
19862         computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
19863         computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
19864         computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
19865         computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
19866         computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
19867         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
19868         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
19869         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
19870         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
19871         computeTests->addChild(createOpCompositeInsertGroup(testCtx));
19872         computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
19873         computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
19874         computeTests->addChild(createOpNMinGroup(testCtx));
19875         computeTests->addChild(createOpNMaxGroup(testCtx));
19876         computeTests->addChild(createOpNClampGroup(testCtx));
19877         {
19878                 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests     (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
19879
19880                 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
19881                 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
19882
19883                 computeTests->addChild(computeAndroidTests.release());
19884         }
19885
19886         computeTests->addChild(create8BitStorageComputeGroup(testCtx));
19887         computeTests->addChild(create16BitStorageComputeGroup(testCtx));
19888         computeTests->addChild(createFloatControlsComputeGroup(testCtx));
19889         computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
19890         computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
19891         computeTests->addChild(createVariableInitComputeGroup(testCtx));
19892         computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
19893         computeTests->addChild(createIndexingComputeGroup(testCtx));
19894         computeTests->addChild(createVariablePointersComputeGroup(testCtx));
19895         computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
19896         computeTests->addChild(createImageSamplerComputeGroup(testCtx));
19897         computeTests->addChild(createOpNameGroup(testCtx));
19898         computeTests->addChild(createOpMemberNameGroup(testCtx));
19899         computeTests->addChild(createPointerParameterComputeGroup(testCtx));
19900         computeTests->addChild(createFloat16Group(testCtx));
19901         computeTests->addChild(createBoolGroup(testCtx));
19902         computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
19903         computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
19904         computeTests->addChild(createSignedIntCompareGroup(testCtx));
19905         computeTests->addChild(createUnusedVariableComputeTests(testCtx));
19906         computeTests->addChild(createPtrAccessChainGroup(testCtx));
19907         computeTests->addChild(createHlslComputeGroup(testCtx));
19908         computeTests->addChild(create64bitCompareComputeGroup(testCtx));
19909
19910         graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
19911         graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
19912         graphicsTests->addChild(createOpNopTests(testCtx));
19913         graphicsTests->addChild(createOpSourceTests(testCtx));
19914         graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
19915         graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
19916         graphicsTests->addChild(createOpLineTests(testCtx));
19917         graphicsTests->addChild(createOpNoLineTests(testCtx));
19918         graphicsTests->addChild(createOpConstantNullTests(testCtx));
19919         graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
19920         graphicsTests->addChild(createMemoryAccessTests(testCtx));
19921         graphicsTests->addChild(createOpUndefTests(testCtx));
19922         graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
19923         graphicsTests->addChild(createModuleTests(testCtx));
19924         graphicsTests->addChild(createUnusedVariableTests(testCtx));
19925         graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
19926         graphicsTests->addChild(createOpPhiTests(testCtx));
19927         graphicsTests->addChild(createNoContractionTests(testCtx));
19928         graphicsTests->addChild(createOpQuantizeTests(testCtx));
19929         graphicsTests->addChild(createLoopTests(testCtx));
19930         graphicsTests->addChild(createSpecConstantTests(testCtx));
19931         graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
19932         graphicsTests->addChild(createBarrierTests(testCtx));
19933         graphicsTests->addChild(createDecorationGroupTests(testCtx));
19934         graphicsTests->addChild(createFRemTests(testCtx));
19935         graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
19936         graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
19937
19938         {
19939                 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests    (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
19940
19941                 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
19942                 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
19943
19944                 graphicsTests->addChild(graphicsAndroidTests.release());
19945         }
19946         graphicsTests->addChild(createOpNameTests(testCtx));
19947         graphicsTests->addChild(createOpNameAbuseTests(testCtx));
19948         graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
19949
19950         graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
19951         graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
19952         graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
19953         graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
19954         graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
19955         graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
19956         graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
19957         graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
19958         graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
19959         graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
19960         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
19961         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
19962         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
19963         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
19964         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
19965         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
19966         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
19967         graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
19968         graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
19969         graphicsTests->addChild(createFloat16Tests(testCtx));
19970         graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
19971         graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
19972
19973         instructionTests->addChild(computeTests.release());
19974         instructionTests->addChild(graphicsTests.release());
19975         instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
19976
19977         return instructionTests.release();
19978 }
19979
19980 } // SpirVAssembly
19981 } // vkt