external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsmInstructionTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2015 Google Inc.
   6  * Copyright (c) 2016 The Khronos Group Inc.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  *
  20  *//*!
  21  * \file
  22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
  23  *//*--------------------------------------------------------------------*/
  24
  25 #include "vktSpvAsmInstructionTests.hpp"
  26
  27 #include "tcuCommandLine.hpp"
  28 #include "tcuFormatUtil.hpp"
  29 #include "tcuFloat.hpp"
  30 #include "tcuFloatFormat.hpp"
  31 #include "tcuRGBA.hpp"
  32 #include "tcuStringTemplate.hpp"
  33 #include "tcuTestLog.hpp"
  34 #include "tcuVectorUtil.hpp"
  35 #include "tcuInterval.hpp"
  36
  37 #include "vkDefs.hpp"
  38 #include "vkDeviceUtil.hpp"
  39 #include "vkMemUtil.hpp"
  40 #include "vkPlatform.hpp"
  41 #include "vkPrograms.hpp"
  42 #include "vkQueryUtil.hpp"
  43 #include "vkRef.hpp"
  44 #include "vkRefUtil.hpp"
  45 #include "vkStrUtil.hpp"
  46 #include "vkTypeUtil.hpp"
  47
  48 #include "deStringUtil.hpp"
  49 #include "deUniquePtr.hpp"
  50 #include "deMath.h"
  51 #include "deRandom.hpp"
  52 #include "tcuStringTemplate.hpp"
  53
  54 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
  55 #include "vktSpvAsm8bitStorageTests.hpp"
  56 #include "vktSpvAsm16bitStorageTests.hpp"
  57 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
  58 #include "vktSpvAsmConditionalBranchTests.hpp"
  59 #include "vktSpvAsmIndexingTests.hpp"
  60 #include "vktSpvAsmImageSamplerTests.hpp"
  61 #include "vktSpvAsmComputeShaderCase.hpp"
  62 #include "vktSpvAsmComputeShaderTestUtil.hpp"
  63 #include "vktSpvAsmFloatControlsTests.hpp"
  64 #include "vktSpvAsmFromHlslTests.hpp"
  65 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
  66 #include "vktSpvAsmVariablePointersTests.hpp"
  67 #include "vktSpvAsmVariableInitTests.hpp"
  68 #include "vktSpvAsmPointerParameterTests.hpp"
  69 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
  70 #include "vktSpvAsmSpirvVersionTests.hpp"
  71 #include "vktTestCaseUtil.hpp"
  72 #include "vktSpvAsmLoopDepLenTests.hpp"
  73 #include "vktSpvAsmLoopDepInfTests.hpp"
  74 #include "vktSpvAsmCompositeInsertTests.hpp"
  75 #include "vktSpvAsmVaryingNameTests.hpp"
  76 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
  77 #include "vktSpvAsmSignedIntCompareTests.hpp"
  78 #include "vktSpvAsmPtrAccessChainTests.hpp"
  79 #include "vktSpvAsm64bitCompareTests.hpp"
  80
  81 #include <cmath>
  82 #include <limits>
  83 #include <map>
  84 #include <string>
  85 #include <sstream>
  86 #include <utility>
  87 #include <stack>
  88
  89 namespace vkt
  90 {
  91 namespace SpirVAssembly
  92 {
  93
  94 namespace
  95 {
  96
  97 using namespace vk;
  98 using std::map;
  99 using std::string;
 100 using std::vector;
 101 using tcu::IVec3;
 102 using tcu::IVec4;
 103 using tcu::RGBA;
 104 using tcu::TestLog;
 105 using tcu::TestStatus;
 106 using tcu::Vec4;
 107 using de::UniquePtr;
 108 using tcu::StringTemplate;
 109 using tcu::Vec4;
 110
 111 const bool TEST_WITH_NAN        = true;
 112 const bool TEST_WITHOUT_NAN     = false;
 113
 114 const string loadScalarF16FromUint =
 115         "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
 116         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 117         "%ld_arg_${var}_entry = OpLabel\n"
 118         "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
 119         "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
 120         "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
 121         "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
 122         "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
 123         "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
 124         "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
 125         "OpReturnValue %ld_arg_${var}_ex\n"
 126         "OpFunctionEnd\n";
 127
 128 const string loadV2F16FromUint =
 129         "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
 130         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 131         "%ld_arg_${var}_entry = OpLabel\n"
 132         "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
 133         "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
 134         "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
 135         "OpReturnValue %ld_arg_${var}_cast\n"
 136         "OpFunctionEnd\n";
 137
 138 const string loadV3F16FromUints =
 139         // Since we allocate a vec4 worth of values, this case is almost the
 140         // same as that case.
 141         "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
 142         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 143         "%ld_arg_${var}_entry = OpLabel\n"
 144         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 145         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 146         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 147         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 148         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 149         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 150         "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
 151         "OpReturnValue %ld_arg_${var}_shuffle\n"
 152         "OpFunctionEnd\n";
 153
 154 const string loadV4F16FromUints =
 155         "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
 156         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 157         "%ld_arg_${var}_entry = OpLabel\n"
 158         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 159         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 160         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 161         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 162         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 163         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 164         "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
 165         "OpReturnValue %ld_arg_${var}_shuffle\n"
 166         "OpFunctionEnd\n";
 167
 168 const string loadM2x2F16FromUints =
 169         "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
 170         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 171         "%ld_arg_${var}_entry = OpLabel\n"
 172         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 173         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 174         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 175         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 176         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 177         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 178         "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
 179         "OpReturnValue %ld_arg_${var}_cons\n"
 180         "OpFunctionEnd\n";
 181
 182 const string loadM2x3F16FromUints =
 183         "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
 184         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 185         "%ld_arg_${var}_entry = OpLabel\n"
 186         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 187         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 188         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 189         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 190         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 191         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 192         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 193         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 194         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 195         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 196         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 197         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 198         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 199         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 200         "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
 201         "OpReturnValue %ld_arg_${var}_mat\n"
 202         "OpFunctionEnd\n";
 203
 204 const string loadM2x4F16FromUints =
 205         "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
 206         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 207         "%ld_arg_${var}_entry = OpLabel\n"
 208         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 209         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 210         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 211         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 212         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 213         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 214         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 215         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 216         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 217         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 218         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 219         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 220         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 221         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 222         "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
 223         "OpReturnValue %ld_arg_${var}_mat\n"
 224         "OpFunctionEnd\n";
 225
 226 const string loadM3x2F16FromUints =
 227         "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
 228         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 229         "%ld_arg_${var}_entry = OpLabel\n"
 230         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 231         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 232         "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 233         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 234         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 235         "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
 236         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 237         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 238         "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
 239         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
 240         "OpReturnValue %ld_arg_${var}_mat\n"
 241         "OpFunctionEnd\n";
 242
 243 const string loadM3x3F16FromUints =
 244         "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
 245         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 246         "%ld_arg_${var}_entry = OpLabel\n"
 247         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 248         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 249         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 250         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 251         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 252         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 253         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 254         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 255         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 256         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 257         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 258         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 259         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 260         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 261         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 262         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 263         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 264         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 265         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 266         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 267         "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
 268         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
 269         "OpReturnValue %ld_arg_${var}_mat\n"
 270         "OpFunctionEnd\n";
 271
 272 const string loadM3x4F16FromUints =
 273         "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
 274         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 275         "%ld_arg_${var}_entry = OpLabel\n"
 276         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 277         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 278         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 279         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 280         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 281         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 282         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 283         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 284         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 285         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 286         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 287         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 288         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 289         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 290         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 291         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 292         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 293         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 294         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 295         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 296         "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
 297         "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
 298         "OpReturnValue %ld_arg_${var}_mat\n"
 299         "OpFunctionEnd\n";
 300
 301 const string loadM4x2F16FromUints =
 302         "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
 303         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 304         "%ld_arg_${var}_entry = OpLabel\n"
 305         "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 306         "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 307         "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 308         "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 309         "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
 310         "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
 311         "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
 312         "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
 313         "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
 314         "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
 315         "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
 316         "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
 317         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
 318         "OpReturnValue %ld_arg_${var}_mat\n"
 319         "OpFunctionEnd\n";
 320
 321 const string loadM4x3F16FromUints =
 322         "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
 323         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 324         "%ld_arg_${var}_entry = OpLabel\n"
 325         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 326         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 327         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 328         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 329         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 330         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 331         "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
 332         "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
 333         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 334         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 335         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 336         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 337         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 338         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 339         "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
 340         "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
 341         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 342         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 343         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 344         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 345         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 346         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 347         "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
 348         "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
 349         "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
 350         "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
 351         "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
 352         "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
 353         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
 354         "OpReturnValue %ld_arg_${var}_mat\n"
 355         "OpFunctionEnd\n";
 356
 357 const string loadM4x4F16FromUints =
 358         "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
 359         "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
 360         "%ld_arg_${var}_entry = OpLabel\n"
 361         "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
 362         "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
 363         "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
 364         "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
 365         "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
 366         "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
 367         "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
 368         "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
 369         "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
 370         "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
 371         "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
 372         "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
 373         "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
 374         "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
 375         "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
 376         "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
 377         "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
 378         "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
 379         "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
 380         "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
 381         "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
 382         "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
 383         "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
 384         "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
 385         "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
 386         "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
 387         "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
 388         "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
 389         "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
 390         "OpReturnValue %ld_arg_${var}_mat\n"
 391         "OpFunctionEnd\n";
 392
 393 const string storeScalarF16AsUint =
 394         // This version is sensitive to the initial value in the output buffer.
 395         // The infrastructure sets all output buffer bits to one before invoking
 396         // the shader so this version uses an atomic and to generate the correct
 397         // zeroes.
 398         "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
 399         "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
 400         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 401         "%st_fn_${var}_entry = OpLabel\n"
 402         "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
 403         "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
 404         "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
 405         "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
 406         // Or 16 bits of ones into the half that was not populated with the result.
 407         "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
 408         "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
 409         "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
 410         "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
 411         "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
 412         "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
 413         "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
 414         "OpReturn\n"
 415         "OpFunctionEnd\n";
 416
 417 const string storeV2F16AsUint =
 418         "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
 419         "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
 420         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 421         "%st_fn_${var}_entry = OpLabel\n"
 422         "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
 423         "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
 424         "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
 425         "OpReturn\n"
 426         "OpFunctionEnd\n";
 427
 428 const string storeV3F16AsUints =
 429         // Since we allocate a vec4 worth of values, this case can be treated
 430         // almost the same as a vec4 case. We will store some extra data that
 431         // should not be compared.
 432         "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
 433         "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
 434         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 435         "%st_fn_${var}_entry = OpLabel\n"
 436         "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
 437         "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
 438         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
 439         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
 440         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 441         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 442         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 443         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 444         "OpReturn\n"
 445         "OpFunctionEnd\n";
 446
 447 const string storeV4F16AsUints =
 448         "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
 449         "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
 450         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 451         "%st_fn_${var}_entry = OpLabel\n"
 452         "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
 453         "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
 454         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
 455         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
 456         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 457         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 458         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 459         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 460         "OpReturn\n"
 461         "OpFunctionEnd\n";
 462
 463 const string storeM2x2F16AsUints =
 464         "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
 465         "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
 466         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 467         "%st_fn_${var}_entry = OpLabel\n"
 468         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 469         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 470         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 471         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 472         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 473         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 474         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 475         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 476         "OpReturn\n"
 477         "OpFunctionEnd\n";
 478
 479 const string storeM2x3F16AsUints =
 480         // In the extracted elements for 01 and 11 the second element doesn't
 481         // matter.
 482         "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
 483         "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
 484         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 485         "%st_fn_${var}_entry = OpLabel\n"
 486         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 487         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 488         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 489         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 490         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 491         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 492         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 493         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 494         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 495         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 496         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 497         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 498         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 499         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 500         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 501         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 502         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 503         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 504         "OpReturn\n"
 505         "OpFunctionEnd\n";
 506
 507 const string storeM2x4F16AsUints =
 508         "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
 509         "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
 510         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 511         "%st_fn_${var}_entry = OpLabel\n"
 512         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 513         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 514         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 515         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 516         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 517         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 518         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 519         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 520         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 521         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 522         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 523         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 524         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 525         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 526         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 527         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 528         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 529         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 530         "OpReturn\n"
 531         "OpFunctionEnd\n";
 532
 533 const string storeM3x2F16AsUints =
 534         "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
 535         "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
 536         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 537         "%st_fn_${var}_entry = OpLabel\n"
 538         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 539         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 540         "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
 541         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 542         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 543         "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
 544         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 545         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 546         "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 547         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 548         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 549         "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
 550         "OpReturn\n"
 551         "OpFunctionEnd\n";
 552
 553 const string storeM3x3F16AsUints =
 554         // The second element of the each broken down vec3 doesn't matter.
 555         "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
 556         "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
 557         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 558         "%st_fn_${var}_entry = OpLabel\n"
 559         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 560         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 561         "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
 562         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 563         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 564         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 565         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 566         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 567         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 568         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 569         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 570         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 571         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 572         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 573         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 574         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 575         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 576         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 577         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 578         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 579         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 580         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 581         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 582         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 583         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 584         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 585         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 586         "OpReturn\n"
 587         "OpFunctionEnd\n";
 588
 589 const string storeM3x4F16AsUints =
 590         "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
 591         "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
 592         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 593         "%st_fn_${var}_entry = OpLabel\n"
 594         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 595         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 596         "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
 597         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 598         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 599         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 600         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 601         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 602         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 603         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 604         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 605         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 606         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 607         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 608         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 609         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 610         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 611         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 612         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 613         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 614         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 615         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 616         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 617         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 618         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 619         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 620         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 621         "OpReturn\n"
 622         "OpFunctionEnd\n";
 623
 624 const string storeM4x2F16AsUints =
 625         "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
 626         "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
 627         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 628         "%st_fn_${var}_entry = OpLabel\n"
 629         "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
 630         "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
 631         "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
 632         "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
 633         "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
 634         "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
 635         "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
 636         "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
 637         "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 638         "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 639         "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 640         "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 641         "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
 642         "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
 643         "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
 644         "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
 645         "OpReturn\n"
 646         "OpFunctionEnd\n";
 647
 648 const string storeM4x3F16AsUints =
 649         // The last element of each decomposed vec3 doesn't matter.
 650         "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
 651         "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
 652         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 653         "%st_fn_${var}_entry = OpLabel\n"
 654         "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
 655         "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
 656         "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
 657         "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
 658         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 659         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 660         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 661         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 662         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 663         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 664         "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
 665         "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
 666         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 667         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 668         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 669         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 670         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 671         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 672         "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
 673         "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
 674         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 675         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 676         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 677         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 678         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 679         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 680         "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
 681         "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
 682         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 683         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 684         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 685         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 686         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 687         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 688         "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
 689         "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
 690         "OpReturn\n"
 691         "OpFunctionEnd\n";
 692
 693 const string storeM4x4F16AsUints =
 694         "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
 695         "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
 696         "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
 697         "%st_fn_${var}_entry = OpLabel\n"
 698         "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
 699         "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
 700         "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
 701         "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
 702         "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
 703         "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
 704         "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
 705         "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
 706         "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
 707         "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
 708         "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
 709         "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
 710         "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
 711         "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
 712         "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
 713         "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
 714         "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
 715         "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
 716         "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
 717         "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
 718         "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
 719         "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
 720         "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
 721         "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
 722         "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
 723         "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
 724         "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
 725         "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
 726         "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
 727         "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
 728         "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
 729         "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
 730         "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
 731         "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
 732         "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
 733         "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
 734         "OpReturn\n"
 735         "OpFunctionEnd\n";
 736
 737 template<typename T>
 738 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
 739 {
 740         T* const typedPtr = (T*)dst;
 741         for (int ndx = 0; ndx < numValues; ndx++)
 742                 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
 743 }
 744
 745 // Filter is a function that returns true if a value should pass, false otherwise.
 746 template<typename T, typename FilterT>
 747 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
 748 {
 749         T* const typedPtr = (T*)dst;
 750         T value;
 751         for (int ndx = 0; ndx < numValues; ndx++)
 752         {
 753                 do
 754                         value = de::randomScalar<T>(rnd, minValue, maxValue);
 755                 while (!filter(value));
 756
 757                 typedPtr[offset + ndx] = value;
 758         }
 759 }
 760
 761 // Gets a 64-bit integer with a more logarithmic distribution
 762 deInt64 randomInt64LogDistributed (de::Random& rnd)
 763 {
 764         deInt64 val = rnd.getUint64();
 765         val &= (1ull << rnd.getInt(1, 63)) - 1;
 766         if (rnd.getBool())
 767                 val = -val;
 768         return val;
 769 }
 770
 771 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
 772 {
 773         for (int ndx = 0; ndx < numValues; ndx++)
 774                 dst[ndx] = randomInt64LogDistributed(rnd);
 775 }
 776
 777 template<typename FilterT>
 778 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
 779 {
 780         for (int ndx = 0; ndx < numValues; ndx++)
 781         {
 782                 deInt64 value;
 783                 do {
 784                         value = randomInt64LogDistributed(rnd);
 785                 } while (!filter(value));
 786                 dst[ndx] = value;
 787         }
 788 }
 789
 790 inline bool filterNonNegative (const deInt64 value)
 791 {
 792         return value >= 0;
 793 }
 794
 795 inline bool filterPositive (const deInt64 value)
 796 {
 797         return value > 0;
 798 }
 799
 800 inline bool filterNotZero (const deInt64 value)
 801 {
 802         return value != 0;
 803 }
 804
 805 static void floorAll (vector<float>& values)
 806 {
 807         for (size_t i = 0; i < values.size(); i++)
 808                 values[i] = deFloatFloor(values[i]);
 809 }
 810
 811 static void floorAll (vector<Vec4>& values)
 812 {
 813         for (size_t i = 0; i < values.size(); i++)
 814                 values[i] = floor(values[i]);
 815 }
 816
 817 struct CaseParameter
 818 {
 819         const char*             name;
 820         string                  param;
 821
 822         CaseParameter   (const char* case_, const string& param_) : name(case_), param(param_) {}
 823 };
 824
 825 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
 826 //
 827 // #version 430
 828 //
 829 // layout(std140, set = 0, binding = 0) readonly buffer Input {
 830 //   float elements[];
 831 // } input_data;
 832 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
 833 //   float elements[];
 834 // } output_data;
 835 //
 836 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 837 //
 838 // void main() {
 839 //   uint x = gl_GlobalInvocationID.x;
 840 //   output_data.elements[x] = -input_data.elements[x];
 841 // }
 842
 843 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
 844 {
 845         std::ostringstream out;
 846         out << getComputeAsmShaderPreambleWithoutLocalSize();
 847
 848         if (useLiteralLocalSize)
 849         {
 850                 out << "OpExecutionMode %main LocalSize "
 851                         << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
 852         }
 853
 854         out << "OpSource GLSL 430\n"
 855                 "OpName %main           \"main\"\n"
 856                 "OpName %id             \"gl_GlobalInvocationID\"\n"
 857                 "OpDecorate %id BuiltIn GlobalInvocationId\n";
 858
 859         if (useSpecConstantWorkgroupSize)
 860         {
 861                 out << "OpDecorate %spec_0 SpecId 100\n"
 862                         << "OpDecorate %spec_1 SpecId 101\n"
 863                         << "OpDecorate %spec_2 SpecId 102\n"
 864                         << "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
 865         }
 866
 867         out << getComputeAsmInputOutputBufferTraits()
 868                 << getComputeAsmCommonTypes()
 869                 << getComputeAsmInputOutputBuffer()
 870                 << "%id        = OpVariable %uvec3ptr Input\n"
 871                 << "%zero      = OpConstant %i32 0 \n";
 872
 873         if (useSpecConstantWorkgroupSize)
 874         {
 875                 out     << "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
 876                         << "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
 877                         << "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
 878                         << "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
 879         }
 880
 881         out << "%main      = OpFunction %void None %voidf\n"
 882                 << "%label     = OpLabel\n"
 883                 << "%idval     = OpLoad %uvec3 %id\n"
 884                 << "%ndx         = OpCompositeExtract %u32 %idval " << ndx << "\n"
 885
 886                         "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
 887                         "%inval     = OpLoad %f32 %inloc\n"
 888                         "%neg       = OpFNegate %f32 %inval\n"
 889                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
 890                         "             OpStore %outloc %neg\n"
 891                         "             OpReturn\n"
 892                         "             OpFunctionEnd\n";
 893         return out.str();
 894 }
 895
 896 tcu::TestCaseGroup* createLocalSizeGroup (tcu::TestContext& testCtx)
 897 {
 898         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "localsize", ""));
 899         ComputeShaderSpec                               spec;
 900         de::Random                                              rnd                             (deStringHash(group->getName()));
 901         const deUint32                                  numElements             = 64u;
 902         vector<float>                                   positiveFloats  (numElements, 0);
 903         vector<float>                                   negativeFloats  (numElements, 0);
 904
 905         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
 906
 907         for (size_t ndx = 0; ndx < numElements; ++ndx)
 908                 negativeFloats[ndx] = -positiveFloats[ndx];
 909
 910         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
 911         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
 912
 913         spec.numWorkGroups = IVec3(numElements, 1, 1);
 914
 915         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, 1), 0u);
 916         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
 917
 918         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, 1), 0u);
 919         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
 920
 921         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, 1), 0u);
 922         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
 923
 924         spec.numWorkGroups = IVec3(1, 1, 1);
 925
 926         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(numElements, 1, 1), 0u);
 927         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
 928
 929         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(numElements, 1, 1), 0u);
 930         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
 931
 932         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(numElements, 1, 1), 0u);
 933         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
 934
 935         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, numElements, 1), 1u);
 936         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
 937
 938         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, numElements, 1), 1u);
 939         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
 940
 941         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, numElements, 1), 1u);
 942         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
 943
 944         spec.assembly = getAsmForLocalSizeTest(true, false, IVec3(1, 1, numElements), 2u);
 945         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
 946
 947         spec.assembly = getAsmForLocalSizeTest(true, true, IVec3(1, 1, numElements), 2u);
 948         group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
 949
 950         spec.assembly = getAsmForLocalSizeTest(false, true, IVec3(1, 1, numElements), 2u);
 951         group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
 952
 953         return group.release();
 954 }
 955
 956 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
 957 {
 958         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
 959         ComputeShaderSpec                               spec;
 960         de::Random                                              rnd                             (deStringHash(group->getName()));
 961         const int                                               numElements             = 100;
 962         vector<float>                                   positiveFloats  (numElements, 0);
 963         vector<float>                                   negativeFloats  (numElements, 0);
 964
 965         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
 966
 967         for (size_t ndx = 0; ndx < numElements; ++ndx)
 968                 negativeFloats[ndx] = -positiveFloats[ndx];
 969
 970         spec.assembly =
 971                 string(getComputeAsmShaderPreamble()) +
 972
 973                 "OpSource GLSL 430\n"
 974                 "OpName %main           \"main\"\n"
 975                 "OpName %id             \"gl_GlobalInvocationID\"\n"
 976
 977                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
 978
 979                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
 980
 981                 + string(getComputeAsmInputOutputBuffer()) +
 982
 983                 "%id        = OpVariable %uvec3ptr Input\n"
 984                 "%zero      = OpConstant %i32 0\n"
 985
 986                 "%main      = OpFunction %void None %voidf\n"
 987                 "%label     = OpLabel\n"
 988                 "%idval     = OpLoad %uvec3 %id\n"
 989                 "%x         = OpCompositeExtract %u32 %idval 0\n"
 990
 991                 "             OpNop\n" // Inside a function body
 992
 993                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
 994                 "%inval     = OpLoad %f32 %inloc\n"
 995                 "%neg       = OpFNegate %f32 %inval\n"
 996                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
 997                 "             OpStore %outloc %neg\n"
 998                 "             OpReturn\n"
 999                 "             OpFunctionEnd\n";
1000         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1001         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1002         spec.numWorkGroups = IVec3(numElements, 1, 1);
1003
1004         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1005
1006         return group.release();
1007 }
1008
1009 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1010 {
1011         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1012         de::Random                                              rnd                             (deStringHash(group->getName()));
1013         const int                                               numElements             = 100;
1014         vector<float>                                   positiveFloats  (numElements, 0);
1015         vector<float>                                   negativeFloats  (numElements, 0);
1016
1017         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1018
1019         for (size_t ndx = 0; ndx < numElements; ++ndx)
1020                 negativeFloats[ndx] = -positiveFloats[ndx];
1021
1022         const VariableLocation                  testLocations[] =
1023         {
1024                 // Set          Binding
1025                 { 0,            5                       },
1026                 { 5,            5                       },
1027         };
1028
1029         for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1030         {
1031                 const VariableLocation& location = testLocations[locationNdx];
1032
1033                 // Unused variable.
1034                 {
1035                         ComputeShaderSpec                               spec;
1036
1037                         spec.assembly =
1038                                 string(getComputeAsmShaderPreamble()) +
1039
1040                                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1041
1042                                 + getUnusedDecorations(location)
1043
1044                                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1045
1046                                 + getUnusedTypesAndConstants()
1047
1048                                 + string(getComputeAsmInputOutputBuffer())
1049
1050                                 + getUnusedBuffer() +
1051
1052                                 "%id        = OpVariable %uvec3ptr Input\n"
1053                                 "%zero      = OpConstant %i32 0\n"
1054
1055                                 "%main      = OpFunction %void None %voidf\n"
1056                                 "%label     = OpLabel\n"
1057                                 "%idval     = OpLoad %uvec3 %id\n"
1058                                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1059
1060                                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1061                                 "%inval     = OpLoad %f32 %inloc\n"
1062                                 "%neg       = OpFNegate %f32 %inval\n"
1063                                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1064                                 "             OpStore %outloc %neg\n"
1065                                 "             OpReturn\n"
1066                                 "             OpFunctionEnd\n";
1067                         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1068                         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1069                         spec.numWorkGroups = IVec3(numElements, 1, 1);
1070
1071                         std::string testName            = "variable_" + location.toString();
1072                         std::string testDescription     = "Unused variable test with " + location.toDescription();
1073
1074                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1075                 }
1076
1077                 // Unused function.
1078                 {
1079                         ComputeShaderSpec                               spec;
1080
1081                         spec.assembly =
1082                                 string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1083
1084                                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1085
1086                                 + getUnusedDecorations(location)
1087
1088                                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1089
1090                                 + getUnusedTypesAndConstants() +
1091
1092                                 "%c_i32_0 = OpConstant %i32 0\n"
1093                                 "%c_i32_1 = OpConstant %i32 1\n"
1094
1095                                 + string(getComputeAsmInputOutputBuffer())
1096
1097                                 + getUnusedBuffer() +
1098
1099                                 "%id        = OpVariable %uvec3ptr Input\n"
1100                                 "%zero      = OpConstant %i32 0\n"
1101
1102                                 "%main      = OpFunction %void None %voidf\n"
1103                                 "%label     = OpLabel\n"
1104                                 "%idval     = OpLoad %uvec3 %id\n"
1105                                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1106
1107                                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1108                                 "%inval     = OpLoad %f32 %inloc\n"
1109                                 "%neg       = OpFNegate %f32 %inval\n"
1110                                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1111                                 "             OpStore %outloc %neg\n"
1112                                 "             OpReturn\n"
1113                                 "             OpFunctionEnd\n"
1114
1115                                 + getUnusedFunctionBody();
1116
1117                         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1118                         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1119                         spec.numWorkGroups = IVec3(numElements, 1, 1);
1120
1121                         std::string testName            = "function_" + location.toString();
1122                         std::string testDescription     = "Unused function test with " + location.toDescription();
1123
1124                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1125                 }
1126         }
1127
1128         return group.release();
1129 }
1130
1131 template<bool nanSupported>
1132 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1133 {
1134         if (outputAllocs.size() != 1)
1135                 return false;
1136
1137         vector<deUint8> input1Bytes;
1138         vector<deUint8> input2Bytes;
1139         vector<deUint8> expectedBytes;
1140
1141         inputs[0].getBytes(input1Bytes);
1142         inputs[1].getBytes(input2Bytes);
1143         expectedOutputs[0].getBytes(expectedBytes);
1144
1145         const deInt32* const    expectedOutputAsInt             = reinterpret_cast<const deInt32*>(&expectedBytes.front());
1146         const deInt32* const    outputAsInt                             = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1147         const float* const              input1AsFloat                   = reinterpret_cast<const float*>(&input1Bytes.front());
1148         const float* const              input2AsFloat                   = reinterpret_cast<const float*>(&input2Bytes.front());
1149         bool returnValue                                                                = true;
1150
1151         for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1152         {
1153                 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1154                         continue;
1155
1156                 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1157                 {
1158                         log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1159                         returnValue = false;
1160                 }
1161         }
1162         return returnValue;
1163 }
1164
1165 typedef VkBool32 (*compareFuncType) (float, float);
1166
1167 struct OpFUnordCase
1168 {
1169         const char*             name;
1170         const char*             opCode;
1171         compareFuncType compareFunc;
1172
1173                                         OpFUnordCase                    (const char* _name, const char* _opCode, compareFuncType _compareFunc)
1174                                                 : name                          (_name)
1175                                                 , opCode                        (_opCode)
1176                                                 , compareFunc           (_compareFunc) {}
1177 };
1178
1179 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1180 do { \
1181         struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1182         cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1183 } while (deGetFalse())
1184
1185 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1186 {
1187         const string                                    nan                             = testWithNan ? "_nan" : "";
1188         const string                                    groupName               = "opfunord" + nan;
1189         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1190         de::Random                                              rnd                             (deStringHash(group->getName()));
1191         const int                                               numElements             = 100;
1192         vector<OpFUnordCase>                    cases;
1193         string                                                  extensions              = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1194         string                                                  capabilities    = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1195         string                                                  exeModes                = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1196         const StringTemplate                    shaderTemplate  (
1197                 string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1198                 "OpSource GLSL 430\n"
1199                 "OpName %main           \"main\"\n"
1200                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1201
1202                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1203
1204                 "OpDecorate %buf BufferBlock\n"
1205                 "OpDecorate %buf2 BufferBlock\n"
1206                 "OpDecorate %indata1 DescriptorSet 0\n"
1207                 "OpDecorate %indata1 Binding 0\n"
1208                 "OpDecorate %indata2 DescriptorSet 0\n"
1209                 "OpDecorate %indata2 Binding 1\n"
1210                 "OpDecorate %outdata DescriptorSet 0\n"
1211                 "OpDecorate %outdata Binding 2\n"
1212                 "OpDecorate %f32arr ArrayStride 4\n"
1213                 "OpDecorate %i32arr ArrayStride 4\n"
1214                 "OpMemberDecorate %buf 0 Offset 0\n"
1215                 "OpMemberDecorate %buf2 0 Offset 0\n"
1216
1217                 + string(getComputeAsmCommonTypes()) +
1218
1219                 "%buf        = OpTypeStruct %f32arr\n"
1220                 "%bufptr     = OpTypePointer Uniform %buf\n"
1221                 "%indata1    = OpVariable %bufptr Uniform\n"
1222                 "%indata2    = OpVariable %bufptr Uniform\n"
1223
1224                 "%buf2       = OpTypeStruct %i32arr\n"
1225                 "%buf2ptr    = OpTypePointer Uniform %buf2\n"
1226                 "%outdata    = OpVariable %buf2ptr Uniform\n"
1227
1228                 "%id        = OpVariable %uvec3ptr Input\n"
1229                 "%zero      = OpConstant %i32 0\n"
1230                 "%consti1   = OpConstant %i32 1\n"
1231                 "%constf1   = OpConstant %f32 1.0\n"
1232
1233                 "%main      = OpFunction %void None %voidf\n"
1234                 "%label     = OpLabel\n"
1235                 "%idval     = OpLoad %uvec3 %id\n"
1236                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1237
1238                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1239                 "%inval1    = OpLoad %f32 %inloc1\n"
1240                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1241                 "%inval2    = OpLoad %f32 %inloc2\n"
1242                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1243
1244                 "%result    = ${OPCODE} %bool %inval1 %inval2\n"
1245                 "%int_res   = OpSelect %i32 %result %consti1 %zero\n"
1246                 "             OpStore %outloc %int_res\n"
1247
1248                 "             OpReturn\n"
1249                 "             OpFunctionEnd\n");
1250
1251         ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1252         ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1253         ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1254         ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1255         ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1256         ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1257
1258         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1259         {
1260                 map<string, string>                     specializations;
1261                 ComputeShaderSpec                       spec;
1262                 const float                                     NaN                             = std::numeric_limits<float>::quiet_NaN();
1263                 vector<float>                           inputFloats1    (numElements, 0);
1264                 vector<float>                           inputFloats2    (numElements, 0);
1265                 vector<deInt32>                         expectedInts    (numElements, 0);
1266
1267                 specializations["OPCODE"]       = cases[caseNdx].opCode;
1268                 spec.assembly                           = shaderTemplate.specialize(specializations);
1269
1270                 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1271                 for (size_t ndx = 0; ndx < numElements; ++ndx)
1272                 {
1273                         switch (ndx % 6)
1274                         {
1275                                 case 0:         inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1276                                 case 1:         inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1277                                 case 2:         inputFloats2[ndx] = inputFloats1[ndx]; break;
1278                                 case 3:         inputFloats2[ndx] = NaN; break;
1279                                 case 4:         inputFloats2[ndx] = inputFloats1[ndx];  inputFloats1[ndx] = NaN; break;
1280                                 case 5:         inputFloats2[ndx] = NaN;                                inputFloats1[ndx] = NaN; break;
1281                         }
1282                         expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1283                 }
1284
1285                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1286                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1287                 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1288                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
1289                 spec.verifyIO           = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1290
1291                 if (testWithNan)
1292                 {
1293                         spec.extensions.push_back("VK_KHR_shader_float_controls");
1294                         spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1295                 }
1296
1297                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1298         }
1299
1300         return group.release();
1301 }
1302
1303 struct OpAtomicCase
1304 {
1305         const char*             name;
1306         const char*             assembly;
1307         const char*             retValAssembly;
1308         OpAtomicType    opAtomic;
1309         deInt32                 numOutputElements;
1310
1311                                         OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1312                                                 : name                          (_name)
1313                                                 , assembly                      (_assembly)
1314                                                 , retValAssembly        (_retValAssembly)
1315                                                 , opAtomic                      (_opAtomic)
1316                                                 , numOutputElements     (_numOutputElements) {}
1317 };
1318
1319 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1320 {
1321         std::string                                             groupName                       ("opatomic");
1322         if (useStorageBuffer)
1323                 groupName += "_storage_buffer";
1324         if (verifyReturnValues)
1325                 groupName += "_return_values";
1326         if (volatileAtomic)
1327                 groupName += "_volatile";
1328         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1329         vector<OpAtomicCase>                    cases;
1330
1331         const StringTemplate                    shaderTemplate  (
1332
1333                 string("OpCapability Shader\n") +
1334                 (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1335                 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1336                 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1337                 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1338                 "OpEntryPoint GLCompute %main \"main\" %id\n"
1339                 "OpExecutionMode %main LocalSize 1 1 1\n" +
1340
1341                 "OpSource GLSL 430\n"
1342                 "OpName %main           \"main\"\n"
1343                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1344
1345                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1346
1347                 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1348                 "OpDecorate %indata DescriptorSet 0\n"
1349                 "OpDecorate %indata Binding 0\n"
1350                 "OpDecorate %i32arr ArrayStride 4\n"
1351                 "OpMemberDecorate %buf 0 Offset 0\n"
1352
1353                 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1354                 "OpDecorate %sum DescriptorSet 0\n"
1355                 "OpDecorate %sum Binding 1\n"
1356                 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1357
1358                 "${RETVAL_BUF_DECORATE}"
1359
1360                 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1361
1362                 "%buf       = OpTypeStruct %i32arr\n"
1363                 "%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1364                 "%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1365
1366                 "%sumbuf    = OpTypeStruct %i32arr\n"
1367                 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1368                 "%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1369
1370                 "${RETVAL_BUF_DECL}"
1371
1372                 "%id        = OpVariable %uvec3ptr Input\n"
1373                 "%minusone  = OpConstant %i32 -1\n"
1374                 "%zero      = OpConstant %i32 0\n"
1375                 "%one       = OpConstant %u32 1\n"
1376                 "%two       = OpConstant %i32 2\n"
1377                 "%five      = OpConstant %i32 5\n"
1378                 "%volbit    = OpConstant %i32 32768\n"
1379
1380                 "%main      = OpFunction %void None %voidf\n"
1381                 "%label     = OpLabel\n"
1382                 "%idval     = OpLoad %uvec3 %id\n"
1383                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1384
1385                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1386                 "%inval     = OpLoad %i32 %inloc\n"
1387
1388                 "%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1389                 "${INSTRUCTION}"
1390                 "${RETVAL_ASSEMBLY}"
1391
1392                 "             OpReturn\n"
1393                 "             OpFunctionEnd\n");
1394
1395         #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1396         do { \
1397                 DE_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
1398                 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1399         } while (deGetFalse())
1400         #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1401         #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1402
1403         ADD_OPATOMIC_CASE_1(iadd,       "%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1404                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IADD );
1405         ADD_OPATOMIC_CASE_1(isub,       "%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1406                                                                 "             OpStore %retloc %retv\n", OPATOMIC_ISUB );
1407         ADD_OPATOMIC_CASE_1(iinc,       "%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1408                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IINC );
1409         ADD_OPATOMIC_CASE_1(idec,       "%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1410                                                                 "             OpStore %retloc %retv\n", OPATOMIC_IDEC );
1411         if (!verifyReturnValues)
1412         {
1413                 ADD_OPATOMIC_CASE_N(load,       "%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1414                                                                         "             OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1415                 ADD_OPATOMIC_CASE_N(store,      "             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1416         }
1417
1418         ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
1419                                                                 "             OpStore %outloc %even\n"
1420                                                                 "%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1421                                                                 "                         OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1422
1423
1424         #undef ADD_OPATOMIC_CASE
1425         #undef ADD_OPATOMIC_CASE_1
1426         #undef ADD_OPATOMIC_CASE_N
1427
1428         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1429         {
1430                 map<string, string>                     specializations;
1431                 ComputeShaderSpec                       spec;
1432                 vector<deInt32>                         inputInts               (numElements, 0);
1433                 vector<deInt32>                         expected                (cases[caseNdx].numOutputElements, -1);
1434
1435                 if (volatileAtomic)
1436                 {
1437                         spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1438                         // volatile, queuefamily scope
1439                         specializations["SEMANTICS"] = "%volbit";
1440                         specializations["SCOPE"] = "%five";
1441                 }
1442                 else
1443                 {
1444                         // non-volatile, device scope
1445                         specializations["SEMANTICS"] = "%zero";
1446                         specializations["SCOPE"] = "%one";
1447                 }
1448                 specializations["INDEX"]                                = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1449                 specializations["INSTRUCTION"]                  = cases[caseNdx].assembly;
1450                 specializations["BLOCK_DECORATION"]             = useStorageBuffer ? "Block" : "BufferBlock";
1451                 specializations["BLOCK_POINTER_TYPE"]   = useStorageBuffer ? "StorageBuffer" : "Uniform";
1452
1453                 if (verifyReturnValues)
1454                 {
1455                         const StringTemplate blockDecoration    (
1456                                 "\n"
1457                                 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1458                                 "OpDecorate %ret DescriptorSet 0\n"
1459                                 "OpDecorate %ret Binding 2\n"
1460                                 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1461
1462                         const StringTemplate blockDeclaration   (
1463                                 "\n"
1464                                 "%retbuf    = OpTypeStruct %i32arr\n"
1465                                 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1466                                 "%ret       = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1467
1468                         specializations["RETVAL_ASSEMBLY"] =
1469                                 "%retloc    = OpAccessChain %i32ptr %ret %zero %x\n"
1470                                 + std::string(cases[caseNdx].retValAssembly);
1471
1472                         specializations["RETVAL_BUF_DECORATE"]  = blockDecoration.specialize(specializations);
1473                         specializations["RETVAL_BUF_DECL"]              = blockDeclaration.specialize(specializations);
1474                 }
1475                 else
1476                 {
1477                         specializations["RETVAL_ASSEMBLY"]              = "";
1478                         specializations["RETVAL_BUF_DECORATE"]  = "";
1479                         specializations["RETVAL_BUF_DECL"]              = "";
1480                 }
1481
1482                 spec.assembly                                                   = shaderTemplate.specialize(specializations);
1483
1484                 // Specialize one more time, to catch things that were in a template parameter
1485                 const StringTemplate                                    assemblyTemplate(spec.assembly);
1486                 spec.assembly                                                   = assemblyTemplate.specialize(specializations);
1487
1488                 if (useStorageBuffer)
1489                         spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1490
1491                 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1492                 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1493                 if (verifyReturnValues)
1494                         spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1495                 spec.numWorkGroups = IVec3(numElements, 1, 1);
1496
1497                 if (verifyReturnValues)
1498                 {
1499                         switch (cases[caseNdx].opAtomic)
1500                         {
1501                                 case OPATOMIC_IADD:
1502                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1503                                         break;
1504                                 case OPATOMIC_ISUB:
1505                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1506                                         break;
1507                                 case OPATOMIC_IINC:
1508                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1509                                         break;
1510                                 case OPATOMIC_IDEC:
1511                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1512                                         break;
1513                                 case OPATOMIC_COMPEX:
1514                                         spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1515                                         break;
1516                                 default:
1517                                         DE_FATAL("Unsupported OpAtomic type for return value verification");
1518                         }
1519                 }
1520                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1521         }
1522
1523         return group.release();
1524 }
1525
1526 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1527 {
1528         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1529         ComputeShaderSpec                               spec;
1530         de::Random                                              rnd                             (deStringHash(group->getName()));
1531         const int                                               numElements             = 100;
1532         vector<float>                                   positiveFloats  (numElements, 0);
1533         vector<float>                                   negativeFloats  (numElements, 0);
1534
1535         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1536
1537         for (size_t ndx = 0; ndx < numElements; ++ndx)
1538                 negativeFloats[ndx] = -positiveFloats[ndx];
1539
1540         spec.assembly =
1541                 string(getComputeAsmShaderPreamble()) +
1542
1543                 "%fname1 = OpString \"negateInputs.comp\"\n"
1544                 "%fname2 = OpString \"negateInputs\"\n"
1545
1546                 "OpSource GLSL 430\n"
1547                 "OpName %main           \"main\"\n"
1548                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1549
1550                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1551
1552                 + string(getComputeAsmInputOutputBufferTraits()) +
1553
1554                 "OpLine %fname1 0 0\n" // At the earliest possible position
1555
1556                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1557
1558                 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1559                 "OpLine %fname2 1 0\n" // Different filenames
1560                 "OpLine %fname1 1000 100000\n"
1561
1562                 "%id        = OpVariable %uvec3ptr Input\n"
1563                 "%zero      = OpConstant %i32 0\n"
1564
1565                 "OpLine %fname1 1 1\n" // Before a function
1566
1567                 "%main      = OpFunction %void None %voidf\n"
1568                 "%label     = OpLabel\n"
1569
1570                 "OpLine %fname1 1 1\n" // In a function
1571
1572                 "%idval     = OpLoad %uvec3 %id\n"
1573                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1574                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1575                 "%inval     = OpLoad %f32 %inloc\n"
1576                 "%neg       = OpFNegate %f32 %inval\n"
1577                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1578                 "             OpStore %outloc %neg\n"
1579                 "             OpReturn\n"
1580                 "             OpFunctionEnd\n";
1581         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1582         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1583         spec.numWorkGroups = IVec3(numElements, 1, 1);
1584
1585         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1586
1587         return group.release();
1588 }
1589
1590 bool veryfiBinaryShader (const ProgramBinary& binary)
1591 {
1592         const size_t    paternCount                     = 3u;
1593         bool paternsCheck[paternCount]          =
1594         {
1595                 false, false, false
1596         };
1597         const string patersns[paternCount]      =
1598         {
1599                 "VULKAN CTS",
1600                 "Negative values",
1601                 "Date: 2017/09/21"
1602         };
1603         size_t                  paternNdx               = 0u;
1604
1605         for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1606         {
1607                 if (false == paternsCheck[paternNdx] &&
1608                         patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1609                         deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1610                 {
1611                         paternsCheck[paternNdx]= true;
1612                         paternNdx++;
1613                         if (paternNdx == paternCount)
1614                                 break;
1615                 }
1616         }
1617
1618         for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1619         {
1620                 if (!paternsCheck[ndx])
1621                         return false;
1622         }
1623
1624         return true;
1625 }
1626
1627 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1628 {
1629         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1630         ComputeShaderSpec                               spec;
1631         de::Random                                              rnd                             (deStringHash(group->getName()));
1632         const int                                               numElements             = 10;
1633         vector<float>                                   positiveFloats  (numElements, 0);
1634         vector<float>                                   negativeFloats  (numElements, 0);
1635
1636         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1637
1638         for (size_t ndx = 0; ndx < numElements; ++ndx)
1639                 negativeFloats[ndx] = -positiveFloats[ndx];
1640
1641         spec.assembly =
1642                 string(getComputeAsmShaderPreamble()) +
1643                 "%fname = OpString \"negateInputs.comp\"\n"
1644
1645                 "OpSource GLSL 430\n"
1646                 "OpName %main           \"main\"\n"
1647                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1648                 "OpModuleProcessed \"VULKAN CTS\"\n"                                    //OpModuleProcessed;
1649                 "OpModuleProcessed \"Negative values\"\n"
1650                 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1651                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1652
1653                 + string(getComputeAsmInputOutputBufferTraits())
1654
1655                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1656
1657                 "OpLine %fname 0 1\n"
1658
1659                 "OpLine %fname 1000 1\n"
1660
1661                 "%id        = OpVariable %uvec3ptr Input\n"
1662                 "%zero      = OpConstant %i32 0\n"
1663                 "%main      = OpFunction %void None %voidf\n"
1664
1665                 "%label     = OpLabel\n"
1666                 "%idval     = OpLoad %uvec3 %id\n"
1667                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1668
1669                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1670                 "%inval     = OpLoad %f32 %inloc\n"
1671                 "%neg       = OpFNegate %f32 %inval\n"
1672                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1673                 "             OpStore %outloc %neg\n"
1674                 "             OpReturn\n"
1675                 "             OpFunctionEnd\n";
1676         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1677         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1678         spec.numWorkGroups = IVec3(numElements, 1, 1);
1679         spec.verifyBinary = veryfiBinaryShader;
1680         spec.spirvVersion = SPIRV_VERSION_1_3;
1681
1682         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1683
1684         return group.release();
1685 }
1686
1687 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1688 {
1689         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1690         ComputeShaderSpec                               spec;
1691         de::Random                                              rnd                             (deStringHash(group->getName()));
1692         const int                                               numElements             = 100;
1693         vector<float>                                   positiveFloats  (numElements, 0);
1694         vector<float>                                   negativeFloats  (numElements, 0);
1695
1696         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1697
1698         for (size_t ndx = 0; ndx < numElements; ++ndx)
1699                 negativeFloats[ndx] = -positiveFloats[ndx];
1700
1701         spec.assembly =
1702                 string(getComputeAsmShaderPreamble()) +
1703
1704                 "%fname = OpString \"negateInputs.comp\"\n"
1705
1706                 "OpSource GLSL 430\n"
1707                 "OpName %main           \"main\"\n"
1708                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1709
1710                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1711
1712                 + string(getComputeAsmInputOutputBufferTraits()) +
1713
1714                 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1715
1716                 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1717
1718                 "OpLine %fname 0 1\n"
1719                 "OpNoLine\n" // Immediately following a preceding OpLine
1720
1721                 "OpLine %fname 1000 1\n"
1722
1723                 "%id        = OpVariable %uvec3ptr Input\n"
1724                 "%zero      = OpConstant %i32 0\n"
1725
1726                 "OpNoLine\n" // Contents after the previous OpLine
1727
1728                 "%main      = OpFunction %void None %voidf\n"
1729                 "%label     = OpLabel\n"
1730                 "%idval     = OpLoad %uvec3 %id\n"
1731                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1732
1733                 "OpNoLine\n" // Multiple OpNoLine
1734                 "OpNoLine\n"
1735                 "OpNoLine\n"
1736
1737                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1738                 "%inval     = OpLoad %f32 %inloc\n"
1739                 "%neg       = OpFNegate %f32 %inval\n"
1740                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1741                 "             OpStore %outloc %neg\n"
1742                 "             OpReturn\n"
1743                 "             OpFunctionEnd\n";
1744         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1745         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1746         spec.numWorkGroups = IVec3(numElements, 1, 1);
1747
1748         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1749
1750         return group.release();
1751 }
1752
1753 // Compare instruction for the contraction compute case.
1754 // Returns true if the output is what is expected from the test case.
1755 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1756 {
1757         if (outputAllocs.size() != 1)
1758                 return false;
1759
1760         // Only size is needed because we are not comparing the exact values.
1761         size_t byteSize = expectedOutputs[0].getByteSize();
1762
1763         const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1764
1765         for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1766                 if (outputAsFloat[i] != 0.f &&
1767                         outputAsFloat[i] != -ldexp(1, -24)) {
1768                         return false;
1769                 }
1770         }
1771
1772         return true;
1773 }
1774
1775 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1776 {
1777         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1778         vector<CaseParameter>                   cases;
1779         const int                                               numElements             = 100;
1780         vector<float>                                   inputFloats1    (numElements, 0);
1781         vector<float>                                   inputFloats2    (numElements, 0);
1782         vector<float>                                   outputFloats    (numElements, 0);
1783         const StringTemplate                    shaderTemplate  (
1784                 string(getComputeAsmShaderPreamble()) +
1785
1786                 "OpName %main           \"main\"\n"
1787                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1788
1789                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1790
1791                 "${DECORATION}\n"
1792
1793                 "OpDecorate %buf BufferBlock\n"
1794                 "OpDecorate %indata1 DescriptorSet 0\n"
1795                 "OpDecorate %indata1 Binding 0\n"
1796                 "OpDecorate %indata2 DescriptorSet 0\n"
1797                 "OpDecorate %indata2 Binding 1\n"
1798                 "OpDecorate %outdata DescriptorSet 0\n"
1799                 "OpDecorate %outdata Binding 2\n"
1800                 "OpDecorate %f32arr ArrayStride 4\n"
1801                 "OpMemberDecorate %buf 0 Offset 0\n"
1802
1803                 + string(getComputeAsmCommonTypes()) +
1804
1805                 "%buf        = OpTypeStruct %f32arr\n"
1806                 "%bufptr     = OpTypePointer Uniform %buf\n"
1807                 "%indata1    = OpVariable %bufptr Uniform\n"
1808                 "%indata2    = OpVariable %bufptr Uniform\n"
1809                 "%outdata    = OpVariable %bufptr Uniform\n"
1810
1811                 "%id         = OpVariable %uvec3ptr Input\n"
1812                 "%zero       = OpConstant %i32 0\n"
1813                 "%c_f_m1     = OpConstant %f32 -1.\n"
1814
1815                 "%main       = OpFunction %void None %voidf\n"
1816                 "%label      = OpLabel\n"
1817                 "%idval      = OpLoad %uvec3 %id\n"
1818                 "%x          = OpCompositeExtract %u32 %idval 0\n"
1819                 "%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
1820                 "%inval1     = OpLoad %f32 %inloc1\n"
1821                 "%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
1822                 "%inval2     = OpLoad %f32 %inloc2\n"
1823                 "%mul        = OpFMul %f32 %inval1 %inval2\n"
1824                 "%add        = OpFAdd %f32 %mul %c_f_m1\n"
1825                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1826                 "              OpStore %outloc %add\n"
1827                 "              OpReturn\n"
1828                 "              OpFunctionEnd\n");
1829
1830         cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1831         cases.push_back(CaseParameter("addition",               "OpDecorate %add NoContraction"));
1832         cases.push_back(CaseParameter("both",                   "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1833
1834         for (size_t ndx = 0; ndx < numElements; ++ndx)
1835         {
1836                 inputFloats1[ndx]       = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1837                 inputFloats2[ndx]       = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1838                 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1839                 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1840                 // So the final result will be 0.f or 0x1p-24.
1841                 // If the operation is combined into a precise fused multiply-add, then the result would be
1842                 // 2^-46 (0xa8800000).
1843                 outputFloats[ndx]       = 0.f;
1844         }
1845
1846         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1847         {
1848                 map<string, string>             specializations;
1849                 ComputeShaderSpec               spec;
1850
1851                 specializations["DECORATION"] = cases[caseNdx].param;
1852                 spec.assembly = shaderTemplate.specialize(specializations);
1853                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1854                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1855                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1856                 spec.numWorkGroups = IVec3(numElements, 1, 1);
1857                 // Check against the two possible answers based on rounding mode.
1858                 spec.verifyIO = &compareNoContractCase;
1859
1860                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1861         }
1862         return group.release();
1863 }
1864
1865 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1866 {
1867         if (outputAllocs.size() != 1)
1868                 return false;
1869
1870         vector<deUint8> expectedBytes;
1871         expectedOutputs[0].getBytes(expectedBytes);
1872
1873         const float*    expectedOutputAsFloat   = reinterpret_cast<const float*>(&expectedBytes.front());
1874         const float*    outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1875
1876         for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1877         {
1878                 const float f0 = expectedOutputAsFloat[idx];
1879                 const float f1 = outputAsFloat[idx];
1880                 // \todo relative error needs to be fairly high because FRem may be implemented as
1881                 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1882                 if (deFloatAbs((f1 - f0) / f0) > 0.02)
1883                         return false;
1884         }
1885
1886         return true;
1887 }
1888
1889 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1890 {
1891         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1892         ComputeShaderSpec                               spec;
1893         de::Random                                              rnd                             (deStringHash(group->getName()));
1894         const int                                               numElements             = 200;
1895         vector<float>                                   inputFloats1    (numElements, 0);
1896         vector<float>                                   inputFloats2    (numElements, 0);
1897         vector<float>                                   outputFloats    (numElements, 0);
1898
1899         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1900         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1901
1902         for (size_t ndx = 0; ndx < numElements; ++ndx)
1903         {
1904                 // Guard against divisors near zero.
1905                 if (std::fabs(inputFloats2[ndx]) < 1e-3)
1906                         inputFloats2[ndx] = 8.f;
1907
1908                 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1909                 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1910         }
1911
1912         spec.assembly =
1913                 string(getComputeAsmShaderPreamble()) +
1914
1915                 "OpName %main           \"main\"\n"
1916                 "OpName %id             \"gl_GlobalInvocationID\"\n"
1917
1918                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1919
1920                 "OpDecorate %buf BufferBlock\n"
1921                 "OpDecorate %indata1 DescriptorSet 0\n"
1922                 "OpDecorate %indata1 Binding 0\n"
1923                 "OpDecorate %indata2 DescriptorSet 0\n"
1924                 "OpDecorate %indata2 Binding 1\n"
1925                 "OpDecorate %outdata DescriptorSet 0\n"
1926                 "OpDecorate %outdata Binding 2\n"
1927                 "OpDecorate %f32arr ArrayStride 4\n"
1928                 "OpMemberDecorate %buf 0 Offset 0\n"
1929
1930                 + string(getComputeAsmCommonTypes()) +
1931
1932                 "%buf        = OpTypeStruct %f32arr\n"
1933                 "%bufptr     = OpTypePointer Uniform %buf\n"
1934                 "%indata1    = OpVariable %bufptr Uniform\n"
1935                 "%indata2    = OpVariable %bufptr Uniform\n"
1936                 "%outdata    = OpVariable %bufptr Uniform\n"
1937
1938                 "%id        = OpVariable %uvec3ptr Input\n"
1939                 "%zero      = OpConstant %i32 0\n"
1940
1941                 "%main      = OpFunction %void None %voidf\n"
1942                 "%label     = OpLabel\n"
1943                 "%idval     = OpLoad %uvec3 %id\n"
1944                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1945                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1946                 "%inval1    = OpLoad %f32 %inloc1\n"
1947                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1948                 "%inval2    = OpLoad %f32 %inloc2\n"
1949                 "%rem       = OpFRem %f32 %inval1 %inval2\n"
1950                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1951                 "             OpStore %outloc %rem\n"
1952                 "             OpReturn\n"
1953                 "             OpFunctionEnd\n";
1954
1955         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1956         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1957         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1958         spec.numWorkGroups = IVec3(numElements, 1, 1);
1959         spec.verifyIO = &compareFRem;
1960
1961         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
1962
1963         return group.release();
1964 }
1965
1966 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1967 {
1968         if (outputAllocs.size() != 1)
1969                 return false;
1970
1971         const BufferSp&                 expectedOutput                  (expectedOutputs[0].getBuffer());
1972         std::vector<deUint8>    data;
1973         expectedOutput->getBytes(data);
1974
1975         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
1976         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1977
1978         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
1979         {
1980                 const float f0 = expectedOutputAsFloat[idx];
1981                 const float f1 = outputAsFloat[idx];
1982
1983                 // For NMin, we accept NaN as output if both inputs were NaN.
1984                 // Otherwise the NaN is the wrong choise, as on architectures that
1985                 // do not handle NaN, those are huge values.
1986                 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
1987                         return false;
1988         }
1989
1990         return true;
1991 }
1992
1993 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
1994 {
1995         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
1996         ComputeShaderSpec                               spec;
1997         de::Random                                              rnd                             (deStringHash(group->getName()));
1998         const int                                               numElements             = 200;
1999         vector<float>                                   inputFloats1    (numElements, 0);
2000         vector<float>                                   inputFloats2    (numElements, 0);
2001         vector<float>                                   outputFloats    (numElements, 0);
2002
2003         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2004         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2005
2006         // Make the first case a full-NAN case.
2007         inputFloats1[0] = TCU_NAN;
2008         inputFloats2[0] = TCU_NAN;
2009
2010         for (size_t ndx = 0; ndx < numElements; ++ndx)
2011         {
2012                 // By default, pick the smallest
2013                 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2014
2015                 // Make half of the cases NaN cases
2016                 if ((ndx & 1) == 0)
2017                 {
2018                         // Alternate between the NaN operand
2019                         if ((ndx & 2) == 0)
2020                         {
2021                                 outputFloats[ndx] = inputFloats2[ndx];
2022                                 inputFloats1[ndx] = TCU_NAN;
2023                         }
2024                         else
2025                         {
2026                                 outputFloats[ndx] = inputFloats1[ndx];
2027                                 inputFloats2[ndx] = TCU_NAN;
2028                         }
2029                 }
2030         }
2031
2032         spec.assembly =
2033                 "OpCapability Shader\n"
2034                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2035                 "OpMemoryModel Logical GLSL450\n"
2036                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2037                 "OpExecutionMode %main LocalSize 1 1 1\n"
2038
2039                 "OpName %main           \"main\"\n"
2040                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2041
2042                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2043
2044                 "OpDecorate %buf BufferBlock\n"
2045                 "OpDecorate %indata1 DescriptorSet 0\n"
2046                 "OpDecorate %indata1 Binding 0\n"
2047                 "OpDecorate %indata2 DescriptorSet 0\n"
2048                 "OpDecorate %indata2 Binding 1\n"
2049                 "OpDecorate %outdata DescriptorSet 0\n"
2050                 "OpDecorate %outdata Binding 2\n"
2051                 "OpDecorate %f32arr ArrayStride 4\n"
2052                 "OpMemberDecorate %buf 0 Offset 0\n"
2053
2054                 + string(getComputeAsmCommonTypes()) +
2055
2056                 "%buf        = OpTypeStruct %f32arr\n"
2057                 "%bufptr     = OpTypePointer Uniform %buf\n"
2058                 "%indata1    = OpVariable %bufptr Uniform\n"
2059                 "%indata2    = OpVariable %bufptr Uniform\n"
2060                 "%outdata    = OpVariable %bufptr Uniform\n"
2061
2062                 "%id        = OpVariable %uvec3ptr Input\n"
2063                 "%zero      = OpConstant %i32 0\n"
2064
2065                 "%main      = OpFunction %void None %voidf\n"
2066                 "%label     = OpLabel\n"
2067                 "%idval     = OpLoad %uvec3 %id\n"
2068                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2069                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2070                 "%inval1    = OpLoad %f32 %inloc1\n"
2071                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2072                 "%inval2    = OpLoad %f32 %inloc2\n"
2073                 "%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2074                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2075                 "             OpStore %outloc %rem\n"
2076                 "             OpReturn\n"
2077                 "             OpFunctionEnd\n";
2078
2079         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2080         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2081         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2082         spec.numWorkGroups = IVec3(numElements, 1, 1);
2083         spec.verifyIO = &compareNMin;
2084
2085         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2086
2087         return group.release();
2088 }
2089
2090 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2091 {
2092         if (outputAllocs.size() != 1)
2093                 return false;
2094
2095         const BufferSp&                 expectedOutput                  = expectedOutputs[0].getBuffer();
2096         std::vector<deUint8>    data;
2097         expectedOutput->getBytes(data);
2098
2099         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2100         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2101
2102         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2103         {
2104                 const float f0 = expectedOutputAsFloat[idx];
2105                 const float f1 = outputAsFloat[idx];
2106
2107                 // For NMax, NaN is considered acceptable result, since in
2108                 // architectures that do not handle NaNs, those are huge values.
2109                 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2110                         return false;
2111         }
2112
2113         return true;
2114 }
2115
2116 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2117 {
2118         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2119         ComputeShaderSpec                               spec;
2120         de::Random                                              rnd                             (deStringHash(group->getName()));
2121         const int                                               numElements             = 200;
2122         vector<float>                                   inputFloats1    (numElements, 0);
2123         vector<float>                                   inputFloats2    (numElements, 0);
2124         vector<float>                                   outputFloats    (numElements, 0);
2125
2126         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2127         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2128
2129         // Make the first case a full-NAN case.
2130         inputFloats1[0] = TCU_NAN;
2131         inputFloats2[0] = TCU_NAN;
2132
2133         for (size_t ndx = 0; ndx < numElements; ++ndx)
2134         {
2135                 // By default, pick the biggest
2136                 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2137
2138                 // Make half of the cases NaN cases
2139                 if ((ndx & 1) == 0)
2140                 {
2141                         // Alternate between the NaN operand
2142                         if ((ndx & 2) == 0)
2143                         {
2144                                 outputFloats[ndx] = inputFloats2[ndx];
2145                                 inputFloats1[ndx] = TCU_NAN;
2146                         }
2147                         else
2148                         {
2149                                 outputFloats[ndx] = inputFloats1[ndx];
2150                                 inputFloats2[ndx] = TCU_NAN;
2151                         }
2152                 }
2153         }
2154
2155         spec.assembly =
2156                 "OpCapability Shader\n"
2157                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2158                 "OpMemoryModel Logical GLSL450\n"
2159                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2160                 "OpExecutionMode %main LocalSize 1 1 1\n"
2161
2162                 "OpName %main           \"main\"\n"
2163                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2164
2165                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2166
2167                 "OpDecorate %buf BufferBlock\n"
2168                 "OpDecorate %indata1 DescriptorSet 0\n"
2169                 "OpDecorate %indata1 Binding 0\n"
2170                 "OpDecorate %indata2 DescriptorSet 0\n"
2171                 "OpDecorate %indata2 Binding 1\n"
2172                 "OpDecorate %outdata DescriptorSet 0\n"
2173                 "OpDecorate %outdata Binding 2\n"
2174                 "OpDecorate %f32arr ArrayStride 4\n"
2175                 "OpMemberDecorate %buf 0 Offset 0\n"
2176
2177                 + string(getComputeAsmCommonTypes()) +
2178
2179                 "%buf        = OpTypeStruct %f32arr\n"
2180                 "%bufptr     = OpTypePointer Uniform %buf\n"
2181                 "%indata1    = OpVariable %bufptr Uniform\n"
2182                 "%indata2    = OpVariable %bufptr Uniform\n"
2183                 "%outdata    = OpVariable %bufptr Uniform\n"
2184
2185                 "%id        = OpVariable %uvec3ptr Input\n"
2186                 "%zero      = OpConstant %i32 0\n"
2187
2188                 "%main      = OpFunction %void None %voidf\n"
2189                 "%label     = OpLabel\n"
2190                 "%idval     = OpLoad %uvec3 %id\n"
2191                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2192                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2193                 "%inval1    = OpLoad %f32 %inloc1\n"
2194                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2195                 "%inval2    = OpLoad %f32 %inloc2\n"
2196                 "%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2197                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2198                 "             OpStore %outloc %rem\n"
2199                 "             OpReturn\n"
2200                 "             OpFunctionEnd\n";
2201
2202         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2203         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2204         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2205         spec.numWorkGroups = IVec3(numElements, 1, 1);
2206         spec.verifyIO = &compareNMax;
2207
2208         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2209
2210         return group.release();
2211 }
2212
2213 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2214 {
2215         if (outputAllocs.size() != 1)
2216                 return false;
2217
2218         const BufferSp&                 expectedOutput                  = expectedOutputs[0].getBuffer();
2219         std::vector<deUint8>    data;
2220         expectedOutput->getBytes(data);
2221
2222         const float* const              expectedOutputAsFloat   = reinterpret_cast<const float*>(&data.front());
2223         const float* const              outputAsFloat                   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2224
2225         for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2226         {
2227                 const float e0 = expectedOutputAsFloat[idx * 2];
2228                 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2229                 const float res = outputAsFloat[idx];
2230
2231                 // For NClamp, we have two possible outcomes based on
2232                 // whether NaNs are handled or not.
2233                 // If either min or max value is NaN, the result is undefined,
2234                 // so this test doesn't stress those. If the clamped value is
2235                 // NaN, and NaNs are handled, the result is min; if NaNs are not
2236                 // handled, they are big values that result in max.
2237                 // If all three parameters are NaN, the result should be NaN.
2238                 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2239                          (deFloatAbs(e0 - res) < 0.00001f) ||
2240                          (deFloatAbs(e1 - res) < 0.00001f)))
2241                         return false;
2242         }
2243
2244         return true;
2245 }
2246
2247 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2248 {
2249         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2250         ComputeShaderSpec                               spec;
2251         de::Random                                              rnd                             (deStringHash(group->getName()));
2252         const int                                               numElements             = 200;
2253         vector<float>                                   inputFloats1    (numElements, 0);
2254         vector<float>                                   inputFloats2    (numElements, 0);
2255         vector<float>                                   inputFloats3    (numElements, 0);
2256         vector<float>                                   outputFloats    (numElements * 2, 0);
2257
2258         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2259         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2260         fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2261
2262         for (size_t ndx = 0; ndx < numElements; ++ndx)
2263         {
2264                 // Results are only defined if max value is bigger than min value.
2265                 if (inputFloats2[ndx] > inputFloats3[ndx])
2266                 {
2267                         float t = inputFloats2[ndx];
2268                         inputFloats2[ndx] = inputFloats3[ndx];
2269                         inputFloats3[ndx] = t;
2270                 }
2271
2272                 // By default, do the clamp, setting both possible answers
2273                 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2274
2275                 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2276                 float maxResB = maxResA;
2277
2278                 // Alternate between the NaN cases
2279                 if (ndx & 1)
2280                 {
2281                         inputFloats1[ndx] = TCU_NAN;
2282                         // If NaN is handled, the result should be same as the clamp minimum.
2283                         // If NaN is not handled, the result should clamp to the clamp maximum.
2284                         maxResA = inputFloats2[ndx];
2285                         maxResB = inputFloats3[ndx];
2286                 }
2287                 else
2288                 {
2289                         // Not a NaN case - only one legal result.
2290                         maxResA = defaultRes;
2291                         maxResB = defaultRes;
2292                 }
2293
2294                 outputFloats[ndx * 2] = maxResA;
2295                 outputFloats[ndx * 2 + 1] = maxResB;
2296         }
2297
2298         // Make the first case a full-NAN case.
2299         inputFloats1[0] = TCU_NAN;
2300         inputFloats2[0] = TCU_NAN;
2301         inputFloats3[0] = TCU_NAN;
2302         outputFloats[0] = TCU_NAN;
2303         outputFloats[1] = TCU_NAN;
2304
2305         spec.assembly =
2306                 "OpCapability Shader\n"
2307                 "%std450        = OpExtInstImport \"GLSL.std.450\"\n"
2308                 "OpMemoryModel Logical GLSL450\n"
2309                 "OpEntryPoint GLCompute %main \"main\" %id\n"
2310                 "OpExecutionMode %main LocalSize 1 1 1\n"
2311
2312                 "OpName %main           \"main\"\n"
2313                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2314
2315                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2316
2317                 "OpDecorate %buf BufferBlock\n"
2318                 "OpDecorate %indata1 DescriptorSet 0\n"
2319                 "OpDecorate %indata1 Binding 0\n"
2320                 "OpDecorate %indata2 DescriptorSet 0\n"
2321                 "OpDecorate %indata2 Binding 1\n"
2322                 "OpDecorate %indata3 DescriptorSet 0\n"
2323                 "OpDecorate %indata3 Binding 2\n"
2324                 "OpDecorate %outdata DescriptorSet 0\n"
2325                 "OpDecorate %outdata Binding 3\n"
2326                 "OpDecorate %f32arr ArrayStride 4\n"
2327                 "OpMemberDecorate %buf 0 Offset 0\n"
2328
2329                 + string(getComputeAsmCommonTypes()) +
2330
2331                 "%buf        = OpTypeStruct %f32arr\n"
2332                 "%bufptr     = OpTypePointer Uniform %buf\n"
2333                 "%indata1    = OpVariable %bufptr Uniform\n"
2334                 "%indata2    = OpVariable %bufptr Uniform\n"
2335                 "%indata3    = OpVariable %bufptr Uniform\n"
2336                 "%outdata    = OpVariable %bufptr Uniform\n"
2337
2338                 "%id        = OpVariable %uvec3ptr Input\n"
2339                 "%zero      = OpConstant %i32 0\n"
2340
2341                 "%main      = OpFunction %void None %voidf\n"
2342                 "%label     = OpLabel\n"
2343                 "%idval     = OpLoad %uvec3 %id\n"
2344                 "%x         = OpCompositeExtract %u32 %idval 0\n"
2345                 "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2346                 "%inval1    = OpLoad %f32 %inloc1\n"
2347                 "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2348                 "%inval2    = OpLoad %f32 %inloc2\n"
2349                 "%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
2350                 "%inval3    = OpLoad %f32 %inloc3\n"
2351                 "%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2352                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2353                 "             OpStore %outloc %rem\n"
2354                 "             OpReturn\n"
2355                 "             OpFunctionEnd\n";
2356
2357         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2358         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2359         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2360         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2361         spec.numWorkGroups = IVec3(numElements, 1, 1);
2362         spec.verifyIO = &compareNClamp;
2363
2364         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2365
2366         return group.release();
2367 }
2368
2369 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2370 {
2371         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2372         de::Random                                              rnd                             (deStringHash(group->getName()));
2373         const int                                               numElements             = 200;
2374
2375         const struct CaseParams
2376         {
2377                 const char*             name;
2378                 const char*             failMessage;            // customized status message
2379                 qpTestResult    failResult;                     // override status on failure
2380                 int                             op1Min, op1Max;         // operand ranges
2381                 int                             op2Min, op2Max;
2382         } cases[] =
2383         {
2384                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
2385                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
2386         };
2387         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2388
2389         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2390         {
2391                 const CaseParams&       params          = cases[caseNdx];
2392                 ComputeShaderSpec       spec;
2393                 vector<deInt32>         inputInts1      (numElements, 0);
2394                 vector<deInt32>         inputInts2      (numElements, 0);
2395                 vector<deInt32>         outputInts      (numElements, 0);
2396
2397                 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2398                 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2399
2400                 for (int ndx = 0; ndx < numElements; ++ndx)
2401                 {
2402                         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2403                         outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2404                 }
2405
2406                 spec.assembly =
2407                         string(getComputeAsmShaderPreamble()) +
2408
2409                         "OpName %main           \"main\"\n"
2410                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2411
2412                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2413
2414                         "OpDecorate %buf BufferBlock\n"
2415                         "OpDecorate %indata1 DescriptorSet 0\n"
2416                         "OpDecorate %indata1 Binding 0\n"
2417                         "OpDecorate %indata2 DescriptorSet 0\n"
2418                         "OpDecorate %indata2 Binding 1\n"
2419                         "OpDecorate %outdata DescriptorSet 0\n"
2420                         "OpDecorate %outdata Binding 2\n"
2421                         "OpDecorate %i32arr ArrayStride 4\n"
2422                         "OpMemberDecorate %buf 0 Offset 0\n"
2423
2424                         + string(getComputeAsmCommonTypes()) +
2425
2426                         "%buf        = OpTypeStruct %i32arr\n"
2427                         "%bufptr     = OpTypePointer Uniform %buf\n"
2428                         "%indata1    = OpVariable %bufptr Uniform\n"
2429                         "%indata2    = OpVariable %bufptr Uniform\n"
2430                         "%outdata    = OpVariable %bufptr Uniform\n"
2431
2432                         "%id        = OpVariable %uvec3ptr Input\n"
2433                         "%zero      = OpConstant %i32 0\n"
2434
2435                         "%main      = OpFunction %void None %voidf\n"
2436                         "%label     = OpLabel\n"
2437                         "%idval     = OpLoad %uvec3 %id\n"
2438                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2439                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2440                         "%inval1    = OpLoad %i32 %inloc1\n"
2441                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2442                         "%inval2    = OpLoad %i32 %inloc2\n"
2443                         "%rem       = OpSRem %i32 %inval1 %inval2\n"
2444                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2445                         "             OpStore %outloc %rem\n"
2446                         "             OpReturn\n"
2447                         "             OpFunctionEnd\n";
2448
2449                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
2450                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
2451                 spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
2452                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2453                 spec.failResult                 = params.failResult;
2454                 spec.failMessage                = params.failMessage;
2455
2456                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2457         }
2458
2459         return group.release();
2460 }
2461
2462 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2463 {
2464         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2465         de::Random                                              rnd                             (deStringHash(group->getName()));
2466         const int                                               numElements             = 200;
2467
2468         const struct CaseParams
2469         {
2470                 const char*             name;
2471                 const char*             failMessage;            // customized status message
2472                 qpTestResult    failResult;                     // override status on failure
2473                 bool                    positive;
2474         } cases[] =
2475         {
2476                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
2477                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
2478         };
2479         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2480
2481         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2482         {
2483                 const CaseParams&       params          = cases[caseNdx];
2484                 ComputeShaderSpec       spec;
2485                 vector<deInt64>         inputInts1      (numElements, 0);
2486                 vector<deInt64>         inputInts2      (numElements, 0);
2487                 vector<deInt64>         outputInts      (numElements, 0);
2488
2489                 if (params.positive)
2490                 {
2491                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2492                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2493                 }
2494                 else
2495                 {
2496                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2497                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2498                 }
2499
2500                 for (int ndx = 0; ndx < numElements; ++ndx)
2501                 {
2502                         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2503                         outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2504                 }
2505
2506                 spec.assembly =
2507                         "OpCapability Int64\n"
2508
2509                         + string(getComputeAsmShaderPreamble()) +
2510
2511                         "OpName %main           \"main\"\n"
2512                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2513
2514                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2515
2516                         "OpDecorate %buf BufferBlock\n"
2517                         "OpDecorate %indata1 DescriptorSet 0\n"
2518                         "OpDecorate %indata1 Binding 0\n"
2519                         "OpDecorate %indata2 DescriptorSet 0\n"
2520                         "OpDecorate %indata2 Binding 1\n"
2521                         "OpDecorate %outdata DescriptorSet 0\n"
2522                         "OpDecorate %outdata Binding 2\n"
2523                         "OpDecorate %i64arr ArrayStride 8\n"
2524                         "OpMemberDecorate %buf 0 Offset 0\n"
2525
2526                         + string(getComputeAsmCommonTypes())
2527                         + string(getComputeAsmCommonInt64Types()) +
2528
2529                         "%buf        = OpTypeStruct %i64arr\n"
2530                         "%bufptr     = OpTypePointer Uniform %buf\n"
2531                         "%indata1    = OpVariable %bufptr Uniform\n"
2532                         "%indata2    = OpVariable %bufptr Uniform\n"
2533                         "%outdata    = OpVariable %bufptr Uniform\n"
2534
2535                         "%id        = OpVariable %uvec3ptr Input\n"
2536                         "%zero      = OpConstant %i64 0\n"
2537
2538                         "%main      = OpFunction %void None %voidf\n"
2539                         "%label     = OpLabel\n"
2540                         "%idval     = OpLoad %uvec3 %id\n"
2541                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2542                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2543                         "%inval1    = OpLoad %i64 %inloc1\n"
2544                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2545                         "%inval2    = OpLoad %i64 %inloc2\n"
2546                         "%rem       = OpSRem %i64 %inval1 %inval2\n"
2547                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2548                         "             OpStore %outloc %rem\n"
2549                         "             OpReturn\n"
2550                         "             OpFunctionEnd\n";
2551
2552                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
2553                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
2554                 spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
2555                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2556                 spec.failResult                 = params.failResult;
2557                 spec.failMessage                = params.failMessage;
2558
2559                 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2560
2561                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2562         }
2563
2564         return group.release();
2565 }
2566
2567 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2568 {
2569         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2570         de::Random                                              rnd                             (deStringHash(group->getName()));
2571         const int                                               numElements             = 200;
2572
2573         const struct CaseParams
2574         {
2575                 const char*             name;
2576                 const char*             failMessage;            // customized status message
2577                 qpTestResult    failResult;                     // override status on failure
2578                 int                             op1Min, op1Max;         // operand ranges
2579                 int                             op2Min, op2Max;
2580         } cases[] =
2581         {
2582                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    0,              65536,  0,              100 },
2583                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  -65536, 65536,  -100,   100 },  // see below
2584         };
2585         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2586
2587         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2588         {
2589                 const CaseParams&       params          = cases[caseNdx];
2590
2591                 ComputeShaderSpec       spec;
2592                 vector<deInt32>         inputInts1      (numElements, 0);
2593                 vector<deInt32>         inputInts2      (numElements, 0);
2594                 vector<deInt32>         outputInts      (numElements, 0);
2595
2596                 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2597                 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2598
2599                 for (int ndx = 0; ndx < numElements; ++ndx)
2600                 {
2601                         deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2602                         if (rem == 0)
2603                         {
2604                                 outputInts[ndx] = 0;
2605                         }
2606                         else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2607                         {
2608                                 // They have the same sign
2609                                 outputInts[ndx] = rem;
2610                         }
2611                         else
2612                         {
2613                                 // They have opposite sign.  The remainder operation takes the
2614                                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2615                                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2616                                 // the result has the correct sign and that it is still
2617                                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2618                                 //
2619                                 // See also http://mathforum.org/library/drmath/view/52343.html
2620                                 outputInts[ndx] = rem + inputInts2[ndx];
2621                         }
2622                 }
2623
2624                 spec.assembly =
2625                         string(getComputeAsmShaderPreamble()) +
2626
2627                         "OpName %main           \"main\"\n"
2628                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2629
2630                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2631
2632                         "OpDecorate %buf BufferBlock\n"
2633                         "OpDecorate %indata1 DescriptorSet 0\n"
2634                         "OpDecorate %indata1 Binding 0\n"
2635                         "OpDecorate %indata2 DescriptorSet 0\n"
2636                         "OpDecorate %indata2 Binding 1\n"
2637                         "OpDecorate %outdata DescriptorSet 0\n"
2638                         "OpDecorate %outdata Binding 2\n"
2639                         "OpDecorate %i32arr ArrayStride 4\n"
2640                         "OpMemberDecorate %buf 0 Offset 0\n"
2641
2642                         + string(getComputeAsmCommonTypes()) +
2643
2644                         "%buf        = OpTypeStruct %i32arr\n"
2645                         "%bufptr     = OpTypePointer Uniform %buf\n"
2646                         "%indata1    = OpVariable %bufptr Uniform\n"
2647                         "%indata2    = OpVariable %bufptr Uniform\n"
2648                         "%outdata    = OpVariable %bufptr Uniform\n"
2649
2650                         "%id        = OpVariable %uvec3ptr Input\n"
2651                         "%zero      = OpConstant %i32 0\n"
2652
2653                         "%main      = OpFunction %void None %voidf\n"
2654                         "%label     = OpLabel\n"
2655                         "%idval     = OpLoad %uvec3 %id\n"
2656                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2657                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2658                         "%inval1    = OpLoad %i32 %inloc1\n"
2659                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2660                         "%inval2    = OpLoad %i32 %inloc2\n"
2661                         "%rem       = OpSMod %i32 %inval1 %inval2\n"
2662                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2663                         "             OpStore %outloc %rem\n"
2664                         "             OpReturn\n"
2665                         "             OpFunctionEnd\n";
2666
2667                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts1)));
2668                 spec.inputs.push_back   (BufferSp(new Int32Buffer(inputInts2)));
2669                 spec.outputs.push_back  (BufferSp(new Int32Buffer(outputInts)));
2670                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2671                 spec.failResult                 = params.failResult;
2672                 spec.failMessage                = params.failMessage;
2673
2674                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2675         }
2676
2677         return group.release();
2678 }
2679
2680 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2681 {
2682         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2683         de::Random                                              rnd                             (deStringHash(group->getName()));
2684         const int                                               numElements             = 200;
2685
2686         const struct CaseParams
2687         {
2688                 const char*             name;
2689                 const char*             failMessage;            // customized status message
2690                 qpTestResult    failResult;                     // override status on failure
2691                 bool                    positive;
2692         } cases[] =
2693         {
2694                 { "positive",   "Output doesn't match with expected",                           QP_TEST_RESULT_FAIL,    true },
2695                 { "all",                "Inconsistent results, but within specification",       negFailResult,                  false },        // see below
2696         };
2697         // If either operand is negative the result is undefined. Some implementations may still return correct values.
2698
2699         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2700         {
2701                 const CaseParams&       params          = cases[caseNdx];
2702
2703                 ComputeShaderSpec       spec;
2704                 vector<deInt64>         inputInts1      (numElements, 0);
2705                 vector<deInt64>         inputInts2      (numElements, 0);
2706                 vector<deInt64>         outputInts      (numElements, 0);
2707
2708
2709                 if (params.positive)
2710                 {
2711                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2712                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2713                 }
2714                 else
2715                 {
2716                         fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2717                         fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2718                 }
2719
2720                 for (int ndx = 0; ndx < numElements; ++ndx)
2721                 {
2722                         deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2723                         if (rem == 0)
2724                         {
2725                                 outputInts[ndx] = 0;
2726                         }
2727                         else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2728                         {
2729                                 // They have the same sign
2730                                 outputInts[ndx] = rem;
2731                         }
2732                         else
2733                         {
2734                                 // They have opposite sign.  The remainder operation takes the
2735                                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2736                                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2737                                 // the result has the correct sign and that it is still
2738                                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2739                                 //
2740                                 // See also http://mathforum.org/library/drmath/view/52343.html
2741                                 outputInts[ndx] = rem + inputInts2[ndx];
2742                         }
2743                 }
2744
2745                 spec.assembly =
2746                         "OpCapability Int64\n"
2747
2748                         + string(getComputeAsmShaderPreamble()) +
2749
2750                         "OpName %main           \"main\"\n"
2751                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2752
2753                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2754
2755                         "OpDecorate %buf BufferBlock\n"
2756                         "OpDecorate %indata1 DescriptorSet 0\n"
2757                         "OpDecorate %indata1 Binding 0\n"
2758                         "OpDecorate %indata2 DescriptorSet 0\n"
2759                         "OpDecorate %indata2 Binding 1\n"
2760                         "OpDecorate %outdata DescriptorSet 0\n"
2761                         "OpDecorate %outdata Binding 2\n"
2762                         "OpDecorate %i64arr ArrayStride 8\n"
2763                         "OpMemberDecorate %buf 0 Offset 0\n"
2764
2765                         + string(getComputeAsmCommonTypes())
2766                         + string(getComputeAsmCommonInt64Types()) +
2767
2768                         "%buf        = OpTypeStruct %i64arr\n"
2769                         "%bufptr     = OpTypePointer Uniform %buf\n"
2770                         "%indata1    = OpVariable %bufptr Uniform\n"
2771                         "%indata2    = OpVariable %bufptr Uniform\n"
2772                         "%outdata    = OpVariable %bufptr Uniform\n"
2773
2774                         "%id        = OpVariable %uvec3ptr Input\n"
2775                         "%zero      = OpConstant %i64 0\n"
2776
2777                         "%main      = OpFunction %void None %voidf\n"
2778                         "%label     = OpLabel\n"
2779                         "%idval     = OpLoad %uvec3 %id\n"
2780                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2781                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2782                         "%inval1    = OpLoad %i64 %inloc1\n"
2783                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2784                         "%inval2    = OpLoad %i64 %inloc2\n"
2785                         "%rem       = OpSMod %i64 %inval1 %inval2\n"
2786                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2787                         "             OpStore %outloc %rem\n"
2788                         "             OpReturn\n"
2789                         "             OpFunctionEnd\n";
2790
2791                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts1)));
2792                 spec.inputs.push_back   (BufferSp(new Int64Buffer(inputInts2)));
2793                 spec.outputs.push_back  (BufferSp(new Int64Buffer(outputInts)));
2794                 spec.numWorkGroups              = IVec3(numElements, 1, 1);
2795                 spec.failResult                 = params.failResult;
2796                 spec.failMessage                = params.failMessage;
2797
2798                 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2799
2800                 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2801         }
2802
2803         return group.release();
2804 }
2805
2806 // Copy contents in the input buffer to the output buffer.
2807 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2808 {
2809         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2810         de::Random                                              rnd                             (deStringHash(group->getName()));
2811         const int                                               numElements             = 100;
2812
2813         // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2814         ComputeShaderSpec                               spec1;
2815         vector<Vec4>                                    inputFloats1    (numElements);
2816         vector<Vec4>                                    outputFloats1   (numElements);
2817
2818         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2819
2820         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2821         floorAll(inputFloats1);
2822
2823         for (size_t ndx = 0; ndx < numElements; ++ndx)
2824                 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2825
2826         spec1.assembly =
2827                 string(getComputeAsmShaderPreamble()) +
2828
2829                 "OpName %main           \"main\"\n"
2830                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2831
2832                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2833                 "OpDecorate %vec4arr ArrayStride 16\n"
2834
2835                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2836
2837                 "%vec4       = OpTypeVector %f32 4\n"
2838                 "%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
2839                 "%vec4ptr_f  = OpTypePointer Function %vec4\n"
2840                 "%vec4arr    = OpTypeRuntimeArray %vec4\n"
2841                 "%buf        = OpTypeStruct %vec4arr\n"
2842                 "%bufptr     = OpTypePointer Uniform %buf\n"
2843                 "%indata     = OpVariable %bufptr Uniform\n"
2844                 "%outdata    = OpVariable %bufptr Uniform\n"
2845
2846                 "%id         = OpVariable %uvec3ptr Input\n"
2847                 "%zero       = OpConstant %i32 0\n"
2848                 "%c_f_0      = OpConstant %f32 0.\n"
2849                 "%c_f_0_5    = OpConstant %f32 0.5\n"
2850                 "%c_f_1_5    = OpConstant %f32 1.5\n"
2851                 "%c_f_2_5    = OpConstant %f32 2.5\n"
2852                 "%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2853
2854                 "%main       = OpFunction %void None %voidf\n"
2855                 "%label      = OpLabel\n"
2856                 "%v_vec4     = OpVariable %vec4ptr_f Function\n"
2857                 "%idval      = OpLoad %uvec3 %id\n"
2858                 "%x          = OpCompositeExtract %u32 %idval 0\n"
2859                 "%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2860                 "%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2861                 "              OpCopyMemory %v_vec4 %inloc\n"
2862                 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2863                 "%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2864                 "              OpStore %outloc %add\n"
2865                 "              OpReturn\n"
2866                 "              OpFunctionEnd\n";
2867
2868         spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2869         spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2870         spec1.numWorkGroups = IVec3(numElements, 1, 1);
2871
2872         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2873
2874         // The following case copies a float[100] variable from the input buffer to the output buffer.
2875         ComputeShaderSpec                               spec2;
2876         vector<float>                                   inputFloats2    (numElements);
2877         vector<float>                                   outputFloats2   (numElements);
2878
2879         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2880
2881         for (size_t ndx = 0; ndx < numElements; ++ndx)
2882                 outputFloats2[ndx] = inputFloats2[ndx];
2883
2884         spec2.assembly =
2885                 string(getComputeAsmShaderPreamble()) +
2886
2887                 "OpName %main           \"main\"\n"
2888                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2889
2890                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2891                 "OpDecorate %f32arr100 ArrayStride 4\n"
2892
2893                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2894
2895                 "%hundred        = OpConstant %u32 100\n"
2896                 "%f32arr100      = OpTypeArray %f32 %hundred\n"
2897                 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2898                 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2899                 "%buf            = OpTypeStruct %f32arr100\n"
2900                 "%bufptr         = OpTypePointer Uniform %buf\n"
2901                 "%indata         = OpVariable %bufptr Uniform\n"
2902                 "%outdata        = OpVariable %bufptr Uniform\n"
2903
2904                 "%id             = OpVariable %uvec3ptr Input\n"
2905                 "%zero           = OpConstant %i32 0\n"
2906
2907                 "%main           = OpFunction %void None %voidf\n"
2908                 "%label          = OpLabel\n"
2909                 "%var            = OpVariable %f32arr100ptr_f Function\n"
2910                 "%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2911                 "%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2912                 "                  OpCopyMemory %var %inarr\n"
2913                 "                  OpCopyMemory %outarr %var\n"
2914                 "                  OpReturn\n"
2915                 "                  OpFunctionEnd\n";
2916
2917         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2918         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2919         spec2.numWorkGroups = IVec3(1, 1, 1);
2920
2921         group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2922
2923         // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2924         ComputeShaderSpec                               spec3;
2925         vector<float>                                   inputFloats3    (16);
2926         vector<float>                                   outputFloats3   (16);
2927
2928         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2929
2930         for (size_t ndx = 0; ndx < 16; ++ndx)
2931                 outputFloats3[ndx] = inputFloats3[ndx];
2932
2933         spec3.assembly =
2934                 string(getComputeAsmShaderPreamble()) +
2935
2936                 "OpName %main           \"main\"\n"
2937                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2938
2939                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2940                 //"OpMemberDecorate %buf 0 Offset 0\n"  - exists in getComputeAsmInputOutputBufferTraits
2941                 "OpMemberDecorate %buf 1 Offset 16\n"
2942                 "OpMemberDecorate %buf 2 Offset 32\n"
2943                 "OpMemberDecorate %buf 3 Offset 48\n"
2944
2945                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2946
2947                 "%vec4      = OpTypeVector %f32 4\n"
2948                 "%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
2949                 "%bufptr    = OpTypePointer Uniform %buf\n"
2950                 "%indata    = OpVariable %bufptr Uniform\n"
2951                 "%outdata   = OpVariable %bufptr Uniform\n"
2952                 "%vec4stptr = OpTypePointer Function %buf\n"
2953
2954                 "%id        = OpVariable %uvec3ptr Input\n"
2955                 "%zero      = OpConstant %i32 0\n"
2956
2957                 "%main      = OpFunction %void None %voidf\n"
2958                 "%label     = OpLabel\n"
2959                 "%var       = OpVariable %vec4stptr Function\n"
2960                 "             OpCopyMemory %var %indata\n"
2961                 "             OpCopyMemory %outdata %var\n"
2962                 "             OpReturn\n"
2963                 "             OpFunctionEnd\n";
2964
2965         spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2966         spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
2967         spec3.numWorkGroups = IVec3(1, 1, 1);
2968
2969         group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
2970
2971         // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
2972         ComputeShaderSpec                               spec4;
2973         vector<float>                                   inputFloats4    (numElements);
2974         vector<float>                                   outputFloats4   (numElements);
2975
2976         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
2977
2978         for (size_t ndx = 0; ndx < numElements; ++ndx)
2979                 outputFloats4[ndx] = -inputFloats4[ndx];
2980
2981         spec4.assembly =
2982                 string(getComputeAsmShaderPreamble()) +
2983
2984                 "OpName %main           \"main\"\n"
2985                 "OpName %id             \"gl_GlobalInvocationID\"\n"
2986
2987                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2988
2989                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
2990
2991                 "%f32ptr_f  = OpTypePointer Function %f32\n"
2992                 "%id        = OpVariable %uvec3ptr Input\n"
2993                 "%zero      = OpConstant %i32 0\n"
2994
2995                 "%main      = OpFunction %void None %voidf\n"
2996                 "%label     = OpLabel\n"
2997                 "%var       = OpVariable %f32ptr_f Function\n"
2998                 "%idval     = OpLoad %uvec3 %id\n"
2999                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3000                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3001                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3002                 "             OpCopyMemory %var %inloc\n"
3003                 "%val       = OpLoad %f32 %var\n"
3004                 "%neg       = OpFNegate %f32 %val\n"
3005                 "             OpStore %outloc %neg\n"
3006                 "             OpReturn\n"
3007                 "             OpFunctionEnd\n";
3008
3009         spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3010         spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3011         spec4.numWorkGroups = IVec3(numElements, 1, 1);
3012
3013         group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3014
3015         return group.release();
3016 }
3017
3018 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3019 {
3020         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3021         ComputeShaderSpec                               spec;
3022         de::Random                                              rnd                             (deStringHash(group->getName()));
3023         const int                                               numElements             = 100;
3024         vector<float>                                   inputFloats             (numElements, 0);
3025         vector<float>                                   outputFloats    (numElements, 0);
3026
3027         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3028
3029         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3030         floorAll(inputFloats);
3031
3032         for (size_t ndx = 0; ndx < numElements; ++ndx)
3033                 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3034
3035         spec.assembly =
3036                 string(getComputeAsmShaderPreamble()) +
3037
3038                 "OpName %main           \"main\"\n"
3039                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3040
3041                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3042
3043                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3044
3045                 "%fmat     = OpTypeMatrix %fvec3 3\n"
3046                 "%three    = OpConstant %u32 3\n"
3047                 "%farr     = OpTypeArray %f32 %three\n"
3048                 "%fst      = OpTypeStruct %f32 %f32\n"
3049
3050                 + string(getComputeAsmInputOutputBuffer()) +
3051
3052                 "%id            = OpVariable %uvec3ptr Input\n"
3053                 "%zero          = OpConstant %i32 0\n"
3054                 "%c_f           = OpConstant %f32 1.5\n"
3055                 "%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3056                 "%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3057                 "%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
3058                 "%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
3059
3060                 "%main          = OpFunction %void None %voidf\n"
3061                 "%label         = OpLabel\n"
3062                 "%c_f_copy      = OpCopyObject %f32   %c_f\n"
3063                 "%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
3064                 "%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
3065                 "%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
3066                 "%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
3067                 "%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3068                 "%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3069                 "%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
3070                 "%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
3071                 // Add up. 1.5 * 5 = 7.5.
3072                 "%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3073                 "%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
3074                 "%add3          = OpFAdd %f32 %add2     %farr_elem\n"
3075                 "%add4          = OpFAdd %f32 %add3     %fst_elem\n"
3076
3077                 "%idval         = OpLoad %uvec3 %id\n"
3078                 "%x             = OpCompositeExtract %u32 %idval 0\n"
3079                 "%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
3080                 "%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
3081                 "%inval         = OpLoad %f32 %inloc\n"
3082                 "%add           = OpFAdd %f32 %add4 %inval\n"
3083                 "                 OpStore %outloc %add\n"
3084                 "                 OpReturn\n"
3085                 "                 OpFunctionEnd\n";
3086         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3087         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3088         spec.numWorkGroups = IVec3(numElements, 1, 1);
3089
3090         group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3091
3092         return group.release();
3093 }
3094 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3095 //
3096 // #version 430
3097 //
3098 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3099 //   float elements[];
3100 // } input_data;
3101 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3102 //   float elements[];
3103 // } output_data;
3104 //
3105 // void not_called_func() {
3106 //   // place OpUnreachable here
3107 // }
3108 //
3109 // uint modulo4(uint val) {
3110 //   switch (val % uint(4)) {
3111 //     case 0:  return 3;
3112 //     case 1:  return 2;
3113 //     case 2:  return 1;
3114 //     case 3:  return 0;
3115 //     default: return 100; // place OpUnreachable here
3116 //   }
3117 // }
3118 //
3119 // uint const5() {
3120 //   return 5;
3121 //   // place OpUnreachable here
3122 // }
3123 //
3124 // void main() {
3125 //   uint x = gl_GlobalInvocationID.x;
3126 //   if (const5() > modulo4(1000)) {
3127 //     output_data.elements[x] = -input_data.elements[x];
3128 //   } else {
3129 //     // place OpUnreachable here
3130 //     output_data.elements[x] = input_data.elements[x];
3131 //   }
3132 // }
3133
3134 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3135 {
3136         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3137         ComputeShaderSpec                               spec;
3138         de::Random                                              rnd                             (deStringHash(group->getName()));
3139         const int                                               numElements             = 100;
3140         vector<float>                                   positiveFloats  (numElements, 0);
3141         vector<float>                                   negativeFloats  (numElements, 0);
3142
3143         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3144
3145         for (size_t ndx = 0; ndx < numElements; ++ndx)
3146                 negativeFloats[ndx] = -positiveFloats[ndx];
3147
3148         spec.assembly =
3149                 string(getComputeAsmShaderPreamble()) +
3150
3151                 "OpSource GLSL 430\n"
3152                 "OpName %main            \"main\"\n"
3153                 "OpName %func_not_called_func \"not_called_func(\"\n"
3154                 "OpName %func_modulo4         \"modulo4(u1;\"\n"
3155                 "OpName %func_const5          \"const5(\"\n"
3156                 "OpName %id                   \"gl_GlobalInvocationID\"\n"
3157
3158                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3159
3160                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3161
3162                 "%u32ptr    = OpTypePointer Function %u32\n"
3163                 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3164                 "%unitf     = OpTypeFunction %u32\n"
3165
3166                 "%id        = OpVariable %uvec3ptr Input\n"
3167                 "%zero      = OpConstant %u32 0\n"
3168                 "%one       = OpConstant %u32 1\n"
3169                 "%two       = OpConstant %u32 2\n"
3170                 "%three     = OpConstant %u32 3\n"
3171                 "%four      = OpConstant %u32 4\n"
3172                 "%five      = OpConstant %u32 5\n"
3173                 "%hundred   = OpConstant %u32 100\n"
3174                 "%thousand  = OpConstant %u32 1000\n"
3175
3176                 + string(getComputeAsmInputOutputBuffer()) +
3177
3178                 // Main()
3179                 "%main   = OpFunction %void None %voidf\n"
3180                 "%main_entry  = OpLabel\n"
3181                 "%v_thousand  = OpVariable %u32ptr Function %thousand\n"
3182                 "%idval       = OpLoad %uvec3 %id\n"
3183                 "%x           = OpCompositeExtract %u32 %idval 0\n"
3184                 "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
3185                 "%inval       = OpLoad %f32 %inloc\n"
3186                 "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
3187                 "%ret_const5  = OpFunctionCall %u32 %func_const5\n"
3188                 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3189                 "%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3190                 "               OpSelectionMerge %if_end None\n"
3191                 "               OpBranchConditional %cmp_gt %if_true %if_false\n"
3192                 "%if_true     = OpLabel\n"
3193                 "%negate      = OpFNegate %f32 %inval\n"
3194                 "               OpStore %outloc %negate\n"
3195                 "               OpBranch %if_end\n"
3196                 "%if_false    = OpLabel\n"
3197                 "               OpUnreachable\n" // Unreachable else branch for if statement
3198                 "%if_end      = OpLabel\n"
3199                 "               OpReturn\n"
3200                 "               OpFunctionEnd\n"
3201
3202                 // not_called_function()
3203                 "%func_not_called_func  = OpFunction %void None %voidf\n"
3204                 "%not_called_func_entry = OpLabel\n"
3205                 "                         OpUnreachable\n" // Unreachable entry block in not called static function
3206                 "                         OpFunctionEnd\n"
3207
3208                 // modulo4()
3209                 "%func_modulo4  = OpFunction %u32 None %uintfuint\n"
3210                 "%valptr        = OpFunctionParameter %u32ptr\n"
3211                 "%modulo4_entry = OpLabel\n"
3212                 "%val           = OpLoad %u32 %valptr\n"
3213                 "%modulo        = OpUMod %u32 %val %four\n"
3214                 "                 OpSelectionMerge %switch_merge None\n"
3215                 "                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3216                 "%case0         = OpLabel\n"
3217                 "                 OpReturnValue %three\n"
3218                 "%case1         = OpLabel\n"
3219                 "                 OpReturnValue %two\n"
3220                 "%case2         = OpLabel\n"
3221                 "                 OpReturnValue %one\n"
3222                 "%case3         = OpLabel\n"
3223                 "                 OpReturnValue %zero\n"
3224                 "%default       = OpLabel\n"
3225                 "                 OpUnreachable\n" // Unreachable default case for switch statement
3226                 "%switch_merge  = OpLabel\n"
3227                 "                 OpUnreachable\n" // Unreachable merge block for switch statement
3228                 "                 OpFunctionEnd\n"
3229
3230                 // const5()
3231                 "%func_const5  = OpFunction %u32 None %unitf\n"
3232                 "%const5_entry = OpLabel\n"
3233                 "                OpReturnValue %five\n"
3234                 "%unreachable  = OpLabel\n"
3235                 "                OpUnreachable\n" // Unreachable block in function
3236                 "                OpFunctionEnd\n";
3237         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3238         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3239         spec.numWorkGroups = IVec3(numElements, 1, 1);
3240
3241         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3242
3243         return group.release();
3244 }
3245
3246 // Assembly code used for testing decoration group is based on GLSL source code:
3247 //
3248 // #version 430
3249 //
3250 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3251 //   float elements[];
3252 // } input_data0;
3253 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3254 //   float elements[];
3255 // } input_data1;
3256 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3257 //   float elements[];
3258 // } input_data2;
3259 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3260 //   float elements[];
3261 // } input_data3;
3262 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3263 //   float elements[];
3264 // } input_data4;
3265 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3266 //   float elements[];
3267 // } output_data;
3268 //
3269 // void main() {
3270 //   uint x = gl_GlobalInvocationID.x;
3271 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3272 // }
3273 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3274 {
3275         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3276         ComputeShaderSpec                               spec;
3277         de::Random                                              rnd                             (deStringHash(group->getName()));
3278         const int                                               numElements             = 100;
3279         vector<float>                                   inputFloats0    (numElements, 0);
3280         vector<float>                                   inputFloats1    (numElements, 0);
3281         vector<float>                                   inputFloats2    (numElements, 0);
3282         vector<float>                                   inputFloats3    (numElements, 0);
3283         vector<float>                                   inputFloats4    (numElements, 0);
3284         vector<float>                                   outputFloats    (numElements, 0);
3285
3286         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3287         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3288         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3289         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3290         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3291
3292         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3293         floorAll(inputFloats0);
3294         floorAll(inputFloats1);
3295         floorAll(inputFloats2);
3296         floorAll(inputFloats3);
3297         floorAll(inputFloats4);
3298
3299         for (size_t ndx = 0; ndx < numElements; ++ndx)
3300                 outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3301
3302         spec.assembly =
3303                 string(getComputeAsmShaderPreamble()) +
3304
3305                 "OpSource GLSL 430\n"
3306                 "OpName %main \"main\"\n"
3307                 "OpName %id \"gl_GlobalInvocationID\"\n"
3308
3309                 // Not using group decoration on variable.
3310                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3311                 // Not using group decoration on type.
3312                 "OpDecorate %f32arr ArrayStride 4\n"
3313
3314                 "OpDecorate %groups BufferBlock\n"
3315                 "OpDecorate %groupm Offset 0\n"
3316                 "%groups = OpDecorationGroup\n"
3317                 "%groupm = OpDecorationGroup\n"
3318
3319                 // Group decoration on multiple structs.
3320                 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3321                 // Group decoration on multiple struct members.
3322                 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3323
3324                 "OpDecorate %group1 DescriptorSet 0\n"
3325                 "OpDecorate %group3 DescriptorSet 0\n"
3326                 "OpDecorate %group3 NonWritable\n"
3327                 "OpDecorate %group3 Restrict\n"
3328                 "%group0 = OpDecorationGroup\n"
3329                 "%group1 = OpDecorationGroup\n"
3330                 "%group3 = OpDecorationGroup\n"
3331
3332                 // Applying the same decoration group multiple times.
3333                 "OpGroupDecorate %group1 %outdata\n"
3334                 "OpGroupDecorate %group1 %outdata\n"
3335                 "OpGroupDecorate %group1 %outdata\n"
3336                 "OpDecorate %outdata DescriptorSet 0\n"
3337                 "OpDecorate %outdata Binding 5\n"
3338                 // Applying decoration group containing nothing.
3339                 "OpGroupDecorate %group0 %indata0\n"
3340                 "OpDecorate %indata0 DescriptorSet 0\n"
3341                 "OpDecorate %indata0 Binding 0\n"
3342                 // Applying decoration group containing one decoration.
3343                 "OpGroupDecorate %group1 %indata1\n"
3344                 "OpDecorate %indata1 Binding 1\n"
3345                 // Applying decoration group containing multiple decorations.
3346                 "OpGroupDecorate %group3 %indata2 %indata3\n"
3347                 "OpDecorate %indata2 Binding 2\n"
3348                 "OpDecorate %indata3 Binding 3\n"
3349                 // Applying multiple decoration groups (with overlapping).
3350                 "OpGroupDecorate %group0 %indata4\n"
3351                 "OpGroupDecorate %group1 %indata4\n"
3352                 "OpGroupDecorate %group3 %indata4\n"
3353                 "OpDecorate %indata4 Binding 4\n"
3354
3355                 + string(getComputeAsmCommonTypes()) +
3356
3357                 "%id   = OpVariable %uvec3ptr Input\n"
3358                 "%zero = OpConstant %i32 0\n"
3359
3360                 "%outbuf    = OpTypeStruct %f32arr\n"
3361                 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3362                 "%outdata   = OpVariable %outbufptr Uniform\n"
3363                 "%inbuf0    = OpTypeStruct %f32arr\n"
3364                 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3365                 "%indata0   = OpVariable %inbuf0ptr Uniform\n"
3366                 "%inbuf1    = OpTypeStruct %f32arr\n"
3367                 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3368                 "%indata1   = OpVariable %inbuf1ptr Uniform\n"
3369                 "%inbuf2    = OpTypeStruct %f32arr\n"
3370                 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3371                 "%indata2   = OpVariable %inbuf2ptr Uniform\n"
3372                 "%inbuf3    = OpTypeStruct %f32arr\n"
3373                 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3374                 "%indata3   = OpVariable %inbuf3ptr Uniform\n"
3375                 "%inbuf4    = OpTypeStruct %f32arr\n"
3376                 "%inbufptr  = OpTypePointer Uniform %inbuf4\n"
3377                 "%indata4   = OpVariable %inbufptr Uniform\n"
3378
3379                 "%main   = OpFunction %void None %voidf\n"
3380                 "%label  = OpLabel\n"
3381                 "%idval  = OpLoad %uvec3 %id\n"
3382                 "%x      = OpCompositeExtract %u32 %idval 0\n"
3383                 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3384                 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3385                 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3386                 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3387                 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3388                 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3389                 "%inval0 = OpLoad %f32 %inloc0\n"
3390                 "%inval1 = OpLoad %f32 %inloc1\n"
3391                 "%inval2 = OpLoad %f32 %inloc2\n"
3392                 "%inval3 = OpLoad %f32 %inloc3\n"
3393                 "%inval4 = OpLoad %f32 %inloc4\n"
3394                 "%add0   = OpFAdd %f32 %inval0 %inval1\n"
3395                 "%add1   = OpFAdd %f32 %add0 %inval2\n"
3396                 "%add2   = OpFAdd %f32 %add1 %inval3\n"
3397                 "%add    = OpFAdd %f32 %add2 %inval4\n"
3398                 "          OpStore %outloc %add\n"
3399                 "          OpReturn\n"
3400                 "          OpFunctionEnd\n";
3401         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3402         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3403         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3404         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3405         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3406         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3407         spec.numWorkGroups = IVec3(numElements, 1, 1);
3408
3409         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3410
3411         return group.release();
3412 }
3413
3414 struct SpecConstantTwoIntCase
3415 {
3416         const char*             caseName;
3417         const char*             scDefinition0;
3418         const char*             scDefinition1;
3419         const char*             scResultType;
3420         const char*             scOperation;
3421         deInt32                 scActualValue0;
3422         deInt32                 scActualValue1;
3423         const char*             resultOperation;
3424         vector<deInt32> expectedOutput;
3425         deInt32                 scActualValueLength;
3426
3427                                         SpecConstantTwoIntCase (const char* name,
3428                                                                                         const char* definition0,
3429                                                                                         const char* definition1,
3430                                                                                         const char* resultType,
3431                                                                                         const char* operation,
3432                                                                                         deInt32 value0,
3433                                                                                         deInt32 value1,
3434                                                                                         const char* resultOp,
3435                                                                                         const vector<deInt32>& output,
3436                                                                                         const deInt32   valueLength = sizeof(deInt32))
3437                                                 : caseName                              (name)
3438                                                 , scDefinition0                 (definition0)
3439                                                 , scDefinition1                 (definition1)
3440                                                 , scResultType                  (resultType)
3441                                                 , scOperation                   (operation)
3442                                                 , scActualValue0                (value0)
3443                                                 , scActualValue1                (value1)
3444                                                 , resultOperation               (resultOp)
3445                                                 , expectedOutput                (output)
3446                                                 , scActualValueLength   (valueLength)
3447                                                 {}
3448 };
3449
3450 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3451 {
3452         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3453         vector<SpecConstantTwoIntCase>  cases;
3454         de::Random                                              rnd                             (deStringHash(group->getName()));
3455         const int                                               numElements             = 100;
3456         const deInt32                                   p1AsFloat16             = 0x3c00; // +1(fp16) == 0 01111 0000000000 == 0011 1100 0000 0000
3457         vector<deInt32>                                 inputInts               (numElements, 0);
3458         vector<deInt32>                                 outputInts1             (numElements, 0);
3459         vector<deInt32>                                 outputInts2             (numElements, 0);
3460         vector<deInt32>                                 outputInts3             (numElements, 0);
3461         vector<deInt32>                                 outputInts4             (numElements, 0);
3462         const StringTemplate                    shaderTemplate  (
3463                 "${CAPABILITIES:opt}"
3464                 + string(getComputeAsmShaderPreamble()) +
3465
3466                 "OpName %main           \"main\"\n"
3467                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3468
3469                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3470                 "OpDecorate %sc_0  SpecId 0\n"
3471                 "OpDecorate %sc_1  SpecId 1\n"
3472                 "OpDecorate %i32arr ArrayStride 4\n"
3473
3474                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3475
3476                 "${OPTYPE_DEFINITIONS:opt}"
3477                 "%buf     = OpTypeStruct %i32arr\n"
3478                 "%bufptr  = OpTypePointer Uniform %buf\n"
3479                 "%indata    = OpVariable %bufptr Uniform\n"
3480                 "%outdata   = OpVariable %bufptr Uniform\n"
3481
3482                 "%id        = OpVariable %uvec3ptr Input\n"
3483                 "%zero      = OpConstant %i32 0\n"
3484
3485                 "%sc_0      = OpSpecConstant${SC_DEF0}\n"
3486                 "%sc_1      = OpSpecConstant${SC_DEF1}\n"
3487                 "%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3488
3489                 "%main      = OpFunction %void None %voidf\n"
3490                 "%label     = OpLabel\n"
3491                 "${TYPE_CONVERT:opt}"
3492                 "%idval     = OpLoad %uvec3 %id\n"
3493                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3494                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3495                 "%inval     = OpLoad %i32 %inloc\n"
3496                 "%final     = ${GEN_RESULT}\n"
3497                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3498                 "             OpStore %outloc %final\n"
3499                 "             OpReturn\n"
3500                 "             OpFunctionEnd\n");
3501
3502         fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3503
3504         for (size_t ndx = 0; ndx < numElements; ++ndx)
3505         {
3506                 outputInts1[ndx] = inputInts[ndx] + 42;
3507                 outputInts2[ndx] = inputInts[ndx];
3508                 outputInts3[ndx] = inputInts[ndx] - 11200;
3509                 outputInts4[ndx] = inputInts[ndx] + 1;
3510         }
3511
3512         const char addScToInput[]               = "OpIAdd %i32 %inval %sc_final";
3513         const char addSc32ToInput[]             = "OpIAdd %i32 %inval %sc_final32";
3514         const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_final %inval %zero";
3515         const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3516
3517         cases.push_back(SpecConstantTwoIntCase("iadd",                                  " %i32 0",              " %i32 0",              "%i32",         "IAdd                 %sc_0 %sc_1",                     62,             -20,    addScToInput,           outputInts1));
3518         cases.push_back(SpecConstantTwoIntCase("isub",                                  " %i32 0",              " %i32 0",              "%i32",         "ISub                 %sc_0 %sc_1",                     100,    58,             addScToInput,           outputInts1));
3519         cases.push_back(SpecConstantTwoIntCase("imul",                                  " %i32 0",              " %i32 0",              "%i32",         "IMul                 %sc_0 %sc_1",                     -2,             -21,    addScToInput,           outputInts1));
3520         cases.push_back(SpecConstantTwoIntCase("sdiv",                                  " %i32 0",              " %i32 0",              "%i32",         "SDiv                 %sc_0 %sc_1",                     -126,   -3,             addScToInput,           outputInts1));
3521         cases.push_back(SpecConstantTwoIntCase("udiv",                                  " %i32 0",              " %i32 0",              "%i32",         "UDiv                 %sc_0 %sc_1",                     126,    3,              addScToInput,           outputInts1));
3522         cases.push_back(SpecConstantTwoIntCase("srem",                                  " %i32 0",              " %i32 0",              "%i32",         "SRem                 %sc_0 %sc_1",                     7,              3,              addScToInput,           outputInts4));
3523         cases.push_back(SpecConstantTwoIntCase("smod",                                  " %i32 0",              " %i32 0",              "%i32",         "SMod                 %sc_0 %sc_1",                     7,              3,              addScToInput,           outputInts4));
3524         cases.push_back(SpecConstantTwoIntCase("umod",                                  " %i32 0",              " %i32 0",              "%i32",         "UMod                 %sc_0 %sc_1",                     342,    50,             addScToInput,           outputInts1));
3525         cases.push_back(SpecConstantTwoIntCase("bitwiseand",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseAnd           %sc_0 %sc_1",                     42,             63,             addScToInput,           outputInts1));
3526         cases.push_back(SpecConstantTwoIntCase("bitwiseor",                             " %i32 0",              " %i32 0",              "%i32",         "BitwiseOr            %sc_0 %sc_1",                     34,             8,              addScToInput,           outputInts1));
3527         cases.push_back(SpecConstantTwoIntCase("bitwisexor",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseXor           %sc_0 %sc_1",                     18,             56,             addScToInput,           outputInts1));
3528         cases.push_back(SpecConstantTwoIntCase("shiftrightlogical",             " %i32 0",              " %i32 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                     168,    2,              addScToInput,           outputInts1));
3529         cases.push_back(SpecConstantTwoIntCase("shiftrightarithmetic",  " %i32 0",              " %i32 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                     168,    2,              addScToInput,           outputInts1));
3530         cases.push_back(SpecConstantTwoIntCase("shiftleftlogical",              " %i32 0",              " %i32 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                     21,             1,              addScToInput,           outputInts1));
3531         cases.push_back(SpecConstantTwoIntCase("slessthan",                             " %i32 0",              " %i32 0",              "%bool",        "SLessThan            %sc_0 %sc_1",                     -20,    -10,    selectTrueUsingSc,      outputInts2));
3532         cases.push_back(SpecConstantTwoIntCase("ulessthan",                             " %i32 0",              " %i32 0",              "%bool",        "ULessThan            %sc_0 %sc_1",                     10,             20,             selectTrueUsingSc,      outputInts2));
3533         cases.push_back(SpecConstantTwoIntCase("sgreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "SGreaterThan         %sc_0 %sc_1",                     -1000,  50,             selectFalseUsingSc,     outputInts2));
3534         cases.push_back(SpecConstantTwoIntCase("ugreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "UGreaterThan         %sc_0 %sc_1",                     10,             5,              selectTrueUsingSc,      outputInts2));
3535         cases.push_back(SpecConstantTwoIntCase("slessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "SLessThanEqual       %sc_0 %sc_1",                     -10,    -10,    selectTrueUsingSc,      outputInts2));
3536         cases.push_back(SpecConstantTwoIntCase("ulessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "ULessThanEqual       %sc_0 %sc_1",                     50,             100,    selectTrueUsingSc,      outputInts2));
3537         cases.push_back(SpecConstantTwoIntCase("sgreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "SGreaterThanEqual    %sc_0 %sc_1",                     -1000,  50,             selectFalseUsingSc,     outputInts2));
3538         cases.push_back(SpecConstantTwoIntCase("ugreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "UGreaterThanEqual    %sc_0 %sc_1",                     10,             10,             selectTrueUsingSc,      outputInts2));
3539         cases.push_back(SpecConstantTwoIntCase("iequal",                                " %i32 0",              " %i32 0",              "%bool",        "IEqual               %sc_0 %sc_1",                     42,             24,             selectFalseUsingSc,     outputInts2));
3540         cases.push_back(SpecConstantTwoIntCase("inotequal",                             " %i32 0",              " %i32 0",              "%bool",        "INotEqual            %sc_0 %sc_1",                     42,             24,             selectTrueUsingSc,      outputInts2));
3541         cases.push_back(SpecConstantTwoIntCase("logicaland",                    "True %bool",   "True %bool",   "%bool",        "LogicalAnd           %sc_0 %sc_1",                     0,              1,              selectFalseUsingSc,     outputInts2));
3542         cases.push_back(SpecConstantTwoIntCase("logicalor",                             "False %bool",  "False %bool",  "%bool",        "LogicalOr            %sc_0 %sc_1",                     1,              0,              selectTrueUsingSc,      outputInts2));
3543         cases.push_back(SpecConstantTwoIntCase("logicalequal",                  "True %bool",   "True %bool",   "%bool",        "LogicalEqual         %sc_0 %sc_1",                     0,              1,              selectFalseUsingSc,     outputInts2));
3544         cases.push_back(SpecConstantTwoIntCase("logicalnotequal",               "False %bool",  "False %bool",  "%bool",        "LogicalNotEqual      %sc_0 %sc_1",                     1,              0,              selectTrueUsingSc,      outputInts2));
3545         cases.push_back(SpecConstantTwoIntCase("snegate",                               " %i32 0",              " %i32 0",              "%i32",         "SNegate              %sc_0",                           -42,    0,              addScToInput,           outputInts1));
3546         cases.push_back(SpecConstantTwoIntCase("not",                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                           -43,    0,              addScToInput,           outputInts1));
3547         cases.push_back(SpecConstantTwoIntCase("logicalnot",                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                           1,              0,              selectFalseUsingSc,     outputInts2));
3548         cases.push_back(SpecConstantTwoIntCase("select",                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %zero",       1,              42,             addScToInput,           outputInts1));
3549         cases.push_back(SpecConstantTwoIntCase("sconvert",                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                           -11200, 0,              addSc32ToInput,         outputInts3));
3550         // -969998336 stored as 32-bit two's complement is the binary representation of -11200 as IEEE-754 Float
3551         cases.push_back(SpecConstantTwoIntCase("fconvert",                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                           -969998336, 0,  addSc32ToInput,         outputInts3));
3552         cases.push_back(SpecConstantTwoIntCase("fconvert16",                    " %f16 0",              " %f16 0",              "%f32",         "FConvert             %sc_0",                           p1AsFloat16, 0, addSc32ToInput,         outputInts4, sizeof(deFloat16)));
3553
3554         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3555         {
3556                 map<string, string>             specializations;
3557                 ComputeShaderSpec               spec;
3558
3559                 specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
3560                 specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
3561                 specializations["SC_RESULT_TYPE"]       = cases[caseNdx].scResultType;
3562                 specializations["SC_OP"]                        = cases[caseNdx].scOperation;
3563                 specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
3564
3565                 // Special SPIR-V code for SConvert-case
3566                 if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
3567                 {
3568                         spec.requestedVulkanFeatures.coreFeatures.shaderInt16   = VK_TRUE;
3569                         specializations["CAPABILITIES"]                                                 = "OpCapability Int16\n";                                                       // Adds 16-bit integer capability
3570                         specializations["OPTYPE_DEFINITIONS"]                                   = "%i16 = OpTypeInt 16 1\n";                                            // Adds 16-bit integer type
3571                         specializations["TYPE_CONVERT"]                                                 = "%sc_final32 = OpSConvert %i32 %sc_final\n";          // Converts 16-bit integer to 32-bit integer
3572                 }
3573
3574                 // Special SPIR-V code for FConvert-case
3575                 if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
3576                 {
3577                         spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3578                         specializations["CAPABILITIES"]                                                 = "OpCapability Float64\n";                                                     // Adds 64-bit float capability
3579                         specializations["OPTYPE_DEFINITIONS"]                                   = "%f64 = OpTypeFloat 64\n";                                            // Adds 64-bit float type
3580                         specializations["TYPE_CONVERT"]                                                 = "%sc_final32 = OpConvertFToS %i32 %sc_final\n";       // Converts 64-bit float to 32-bit integer
3581                 }
3582
3583                 // Special SPIR-V code for FConvert-case for 16-bit floats
3584                 if (strcmp(cases[caseNdx].caseName, "fconvert16") == 0)
3585                 {
3586                         spec.extensions.push_back("VK_KHR_shader_float16_int8");
3587                         spec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3588                         specializations["CAPABILITIES"]                 = "OpCapability Float16\n";                                                     // Adds 16-bit float capability
3589                         specializations["OPTYPE_DEFINITIONS"]   = "%f16 = OpTypeFloat 16\n";                                            // Adds 16-bit float type
3590                         specializations["TYPE_CONVERT"]                 = "%sc_final32 = OpConvertFToS %i32 %sc_final\n";       // Converts 16-bit float to 32-bit integer
3591                 }
3592
3593                 spec.assembly = shaderTemplate.specialize(specializations);
3594                 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3595                 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3596                 spec.numWorkGroups = IVec3(numElements, 1, 1);
3597                 spec.specConstants.append(&cases[caseNdx].scActualValue0, cases[caseNdx].scActualValueLength);
3598                 spec.specConstants.append(&cases[caseNdx].scActualValue1, cases[caseNdx].scActualValueLength);
3599
3600                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
3601         }
3602
3603         ComputeShaderSpec                               spec;
3604
3605         spec.assembly =
3606                 string(getComputeAsmShaderPreamble()) +
3607
3608                 "OpName %main           \"main\"\n"
3609                 "OpName %id             \"gl_GlobalInvocationID\"\n"
3610
3611                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3612                 "OpDecorate %sc_0  SpecId 0\n"
3613                 "OpDecorate %sc_1  SpecId 1\n"
3614                 "OpDecorate %sc_2  SpecId 2\n"
3615                 "OpDecorate %i32arr ArrayStride 4\n"
3616
3617                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3618
3619                 "%ivec3       = OpTypeVector %i32 3\n"
3620                 "%buf         = OpTypeStruct %i32arr\n"
3621                 "%bufptr      = OpTypePointer Uniform %buf\n"
3622                 "%indata      = OpVariable %bufptr Uniform\n"
3623                 "%outdata     = OpVariable %bufptr Uniform\n"
3624
3625                 "%id          = OpVariable %uvec3ptr Input\n"
3626                 "%zero        = OpConstant %i32 0\n"
3627                 "%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
3628                 "%vec3_undef  = OpUndef %ivec3\n"
3629
3630                 "%sc_0        = OpSpecConstant %i32 0\n"
3631                 "%sc_1        = OpSpecConstant %i32 0\n"
3632                 "%sc_2        = OpSpecConstant %i32 0\n"
3633                 "%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"                                                 // (sc_0, 0, 0)
3634                 "%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"                                                 // (0, sc_1, 0)
3635                 "%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"                                                 // (0, 0, sc_2)
3636                 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
3637                 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
3638                 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
3639                 "%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
3640                 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
3641                 "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
3642                 "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
3643                 "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
3644                 "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
3645                 "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"                                                              // (sc_2 - sc_0) * sc_1
3646
3647                 "%main      = OpFunction %void None %voidf\n"
3648                 "%label     = OpLabel\n"
3649                 "%idval     = OpLoad %uvec3 %id\n"
3650                 "%x         = OpCompositeExtract %u32 %idval 0\n"
3651                 "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3652                 "%inval     = OpLoad %i32 %inloc\n"
3653                 "%final     = OpIAdd %i32 %inval %sc_final\n"
3654                 "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3655                 "             OpStore %outloc %final\n"
3656                 "             OpReturn\n"
3657                 "             OpFunctionEnd\n";
3658         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3659         spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
3660         spec.numWorkGroups = IVec3(numElements, 1, 1);
3661         spec.specConstants.append<deInt32>(123);
3662         spec.specConstants.append<deInt32>(56);
3663         spec.specConstants.append<deInt32>(-77);
3664
3665         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
3666
3667         return group.release();
3668 }
3669
3670 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
3671 {
3672         ComputeShaderSpec       specInt;
3673         ComputeShaderSpec       specFloat;
3674         ComputeShaderSpec       specFloat16;
3675         ComputeShaderSpec       specVec3;
3676         ComputeShaderSpec       specMat4;
3677         ComputeShaderSpec       specArray;
3678         ComputeShaderSpec       specStruct;
3679         de::Random                      rnd                             (deStringHash(group->getName()));
3680         const int                       numElements             = 100;
3681         vector<float>           inputFloats             (numElements, 0);
3682         vector<float>           outputFloats    (numElements, 0);
3683         vector<deFloat16>       inputFloats16   (numElements, 0);
3684         vector<deFloat16>       outputFloats16  (numElements, 0);
3685
3686         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
3687
3688         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3689         floorAll(inputFloats);
3690
3691         for (size_t ndx = 0; ndx < numElements; ++ndx)
3692         {
3693                 // Just check if the value is positive or not
3694                 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
3695         }
3696
3697         for (size_t ndx = 0; ndx < numElements; ++ndx)
3698         {
3699                 inputFloats16[ndx] = tcu::Float16(inputFloats[ndx]).bits();
3700                 outputFloats16[ndx] = tcu::Float16(outputFloats[ndx]).bits();
3701         }
3702
3703         // All of the tests are of the form:
3704         //
3705         // testtype r
3706         //
3707         // if (inputdata > 0)
3708         //   r = 1
3709         // else
3710         //   r = -1
3711         //
3712         // return (float)r
3713
3714         specFloat.assembly =
3715                 string(getComputeAsmShaderPreamble()) +
3716
3717                 "OpSource GLSL 430\n"
3718                 "OpName %main \"main\"\n"
3719                 "OpName %id \"gl_GlobalInvocationID\"\n"
3720
3721                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3722
3723                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3724
3725                 "%id = OpVariable %uvec3ptr Input\n"
3726                 "%zero       = OpConstant %i32 0\n"
3727                 "%float_0    = OpConstant %f32 0.0\n"
3728                 "%float_1    = OpConstant %f32 1.0\n"
3729                 "%float_n1   = OpConstant %f32 -1.0\n"
3730
3731                 "%main     = OpFunction %void None %voidf\n"
3732                 "%entry    = OpLabel\n"
3733                 "%idval    = OpLoad %uvec3 %id\n"
3734                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3735                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3736                 "%inval    = OpLoad %f32 %inloc\n"
3737
3738                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3739                 "            OpSelectionMerge %cm None\n"
3740                 "            OpBranchConditional %comp %tb %fb\n"
3741                 "%tb       = OpLabel\n"
3742                 "            OpBranch %cm\n"
3743                 "%fb       = OpLabel\n"
3744                 "            OpBranch %cm\n"
3745                 "%cm       = OpLabel\n"
3746                 "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
3747
3748                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3749                 "            OpStore %outloc %res\n"
3750                 "            OpReturn\n"
3751
3752                 "            OpFunctionEnd\n";
3753         specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3754         specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3755         specFloat.numWorkGroups = IVec3(numElements, 1, 1);
3756
3757         specFloat16.assembly =
3758                 "OpCapability Shader\n"
3759                 "OpCapability StorageUniformBufferBlock16\n"
3760                 "OpCapability Float16\n"
3761                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
3762                 "OpMemoryModel Logical GLSL450\n"
3763                 "OpEntryPoint GLCompute %main \"main\" %id\n"
3764                 "OpExecutionMode %main LocalSize 1 1 1\n"
3765
3766                 "OpSource GLSL 430\n"
3767                 "OpName %main \"main\"\n"
3768                 "OpName %id \"gl_GlobalInvocationID\"\n"
3769
3770                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3771
3772                 "OpDecorate %buf BufferBlock\n"
3773                 "OpDecorate %indata DescriptorSet 0\n"
3774                 "OpDecorate %indata Binding 0\n"
3775                 "OpDecorate %outdata DescriptorSet 0\n"
3776                 "OpDecorate %outdata Binding 1\n"
3777                 "OpDecorate %f16arr ArrayStride 2\n"
3778                 "OpMemberDecorate %buf 0 Offset 0\n"
3779
3780                 "%f16      = OpTypeFloat 16\n"
3781                 "%f16ptr   = OpTypePointer Uniform %f16\n"
3782                 "%f16arr   = OpTypeRuntimeArray %f16\n"
3783
3784                 + string(getComputeAsmCommonTypes()) +
3785
3786                 "%buf      = OpTypeStruct %f16arr\n"
3787                 "%bufptr   = OpTypePointer Uniform %buf\n"
3788                 "%indata   = OpVariable %bufptr Uniform\n"
3789                 "%outdata  = OpVariable %bufptr Uniform\n"
3790
3791                 "%id       = OpVariable %uvec3ptr Input\n"
3792                 "%zero     = OpConstant %i32 0\n"
3793                 "%float_0  = OpConstant %f32 0.0\n"
3794                 "%float_1  = OpConstant %f32 1.0\n"
3795                 "%float_n1 = OpConstant %f32 -1.0\n"
3796
3797                 "%main     = OpFunction %void None %voidf\n"
3798                 "%entry    = OpLabel\n"
3799                 "%idval    = OpLoad %uvec3 %id\n"
3800                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3801                 "%inloc    = OpAccessChain %f16ptr %indata %zero %x\n"
3802                 "%inval    = OpLoad %f16 %inloc\n"
3803                 "%f32_inval = OpFConvert %f32 %inval\n"
3804
3805                 "%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
3806                 "            OpSelectionMerge %cm None\n"
3807                 "            OpBranchConditional %comp %tb %fb\n"
3808                 "%tb       = OpLabel\n"
3809                 "            OpBranch %cm\n"
3810                 "%fb       = OpLabel\n"
3811                 "            OpBranch %cm\n"
3812                 "%cm       = OpLabel\n"
3813                 "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
3814                 "%f16_res  = OpFConvert %f16 %res\n"
3815
3816                 "%outloc   = OpAccessChain %f16ptr %outdata %zero %x\n"
3817                 "            OpStore %outloc %f16_res\n"
3818                 "            OpReturn\n"
3819
3820                 "            OpFunctionEnd\n";
3821         specFloat16.inputs.push_back(BufferSp(new Float16Buffer(inputFloats16)));
3822         specFloat16.outputs.push_back(BufferSp(new Float16Buffer(outputFloats16)));
3823         specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
3824         specFloat16.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3825         specFloat16.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3826
3827         specMat4.assembly =
3828                 string(getComputeAsmShaderPreamble()) +
3829
3830                 "OpSource GLSL 430\n"
3831                 "OpName %main \"main\"\n"
3832                 "OpName %id \"gl_GlobalInvocationID\"\n"
3833
3834                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3835
3836                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3837
3838                 "%id = OpVariable %uvec3ptr Input\n"
3839                 "%v4f32      = OpTypeVector %f32 4\n"
3840                 "%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
3841                 "%zero       = OpConstant %i32 0\n"
3842                 "%float_0    = OpConstant %f32 0.0\n"
3843                 "%float_1    = OpConstant %f32 1.0\n"
3844                 "%float_n1   = OpConstant %f32 -1.0\n"
3845                 "%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
3846                 "%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
3847                 "%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
3848                 "%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
3849                 "%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
3850                 "%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
3851                 "%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
3852                 "%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
3853                 "%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
3854                 "%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
3855
3856                 "%main     = OpFunction %void None %voidf\n"
3857                 "%entry    = OpLabel\n"
3858                 "%idval    = OpLoad %uvec3 %id\n"
3859                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3860                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3861                 "%inval    = OpLoad %f32 %inloc\n"
3862
3863                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3864                 "            OpSelectionMerge %cm None\n"
3865                 "            OpBranchConditional %comp %tb %fb\n"
3866                 "%tb       = OpLabel\n"
3867                 "            OpBranch %cm\n"
3868                 "%fb       = OpLabel\n"
3869                 "            OpBranch %cm\n"
3870                 "%cm       = OpLabel\n"
3871                 "%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
3872                 "%res      = OpCompositeExtract %f32 %mres 2 2\n"
3873
3874                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3875                 "            OpStore %outloc %res\n"
3876                 "            OpReturn\n"
3877
3878                 "            OpFunctionEnd\n";
3879         specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3880         specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3881         specMat4.numWorkGroups = IVec3(numElements, 1, 1);
3882
3883         specVec3.assembly =
3884                 string(getComputeAsmShaderPreamble()) +
3885
3886                 "OpSource GLSL 430\n"
3887                 "OpName %main \"main\"\n"
3888                 "OpName %id \"gl_GlobalInvocationID\"\n"
3889
3890                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3891
3892                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3893
3894                 "%id = OpVariable %uvec3ptr Input\n"
3895                 "%zero       = OpConstant %i32 0\n"
3896                 "%float_0    = OpConstant %f32 0.0\n"
3897                 "%float_1    = OpConstant %f32 1.0\n"
3898                 "%float_n1   = OpConstant %f32 -1.0\n"
3899                 "%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
3900                 "%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
3901
3902                 "%main     = OpFunction %void None %voidf\n"
3903                 "%entry    = OpLabel\n"
3904                 "%idval    = OpLoad %uvec3 %id\n"
3905                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3906                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3907                 "%inval    = OpLoad %f32 %inloc\n"
3908
3909                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3910                 "            OpSelectionMerge %cm None\n"
3911                 "            OpBranchConditional %comp %tb %fb\n"
3912                 "%tb       = OpLabel\n"
3913                 "            OpBranch %cm\n"
3914                 "%fb       = OpLabel\n"
3915                 "            OpBranch %cm\n"
3916                 "%cm       = OpLabel\n"
3917                 "%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
3918                 "%res      = OpCompositeExtract %f32 %vres 2\n"
3919
3920                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3921                 "            OpStore %outloc %res\n"
3922                 "            OpReturn\n"
3923
3924                 "            OpFunctionEnd\n";
3925         specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3926         specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3927         specVec3.numWorkGroups = IVec3(numElements, 1, 1);
3928
3929         specInt.assembly =
3930                 string(getComputeAsmShaderPreamble()) +
3931
3932                 "OpSource GLSL 430\n"
3933                 "OpName %main \"main\"\n"
3934                 "OpName %id \"gl_GlobalInvocationID\"\n"
3935
3936                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3937
3938                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3939
3940                 "%id = OpVariable %uvec3ptr Input\n"
3941                 "%zero       = OpConstant %i32 0\n"
3942                 "%float_0    = OpConstant %f32 0.0\n"
3943                 "%i1         = OpConstant %i32 1\n"
3944                 "%i2         = OpConstant %i32 -1\n"
3945
3946                 "%main     = OpFunction %void None %voidf\n"
3947                 "%entry    = OpLabel\n"
3948                 "%idval    = OpLoad %uvec3 %id\n"
3949                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3950                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3951                 "%inval    = OpLoad %f32 %inloc\n"
3952
3953                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
3954                 "            OpSelectionMerge %cm None\n"
3955                 "            OpBranchConditional %comp %tb %fb\n"
3956                 "%tb       = OpLabel\n"
3957                 "            OpBranch %cm\n"
3958                 "%fb       = OpLabel\n"
3959                 "            OpBranch %cm\n"
3960                 "%cm       = OpLabel\n"
3961                 "%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
3962                 "%res      = OpConvertSToF %f32 %ires\n"
3963
3964                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
3965                 "            OpStore %outloc %res\n"
3966                 "            OpReturn\n"
3967
3968                 "            OpFunctionEnd\n";
3969         specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3970         specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3971         specInt.numWorkGroups = IVec3(numElements, 1, 1);
3972
3973         specArray.assembly =
3974                 string(getComputeAsmShaderPreamble()) +
3975
3976                 "OpSource GLSL 430\n"
3977                 "OpName %main \"main\"\n"
3978                 "OpName %id \"gl_GlobalInvocationID\"\n"
3979
3980                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3981
3982                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3983
3984                 "%id = OpVariable %uvec3ptr Input\n"
3985                 "%zero       = OpConstant %i32 0\n"
3986                 "%u7         = OpConstant %u32 7\n"
3987                 "%float_0    = OpConstant %f32 0.0\n"
3988                 "%float_1    = OpConstant %f32 1.0\n"
3989                 "%float_n1   = OpConstant %f32 -1.0\n"
3990                 "%f32a7      = OpTypeArray %f32 %u7\n"
3991                 "%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
3992                 "%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
3993                 "%main     = OpFunction %void None %voidf\n"
3994                 "%entry    = OpLabel\n"
3995                 "%idval    = OpLoad %uvec3 %id\n"
3996                 "%x        = OpCompositeExtract %u32 %idval 0\n"
3997                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
3998                 "%inval    = OpLoad %f32 %inloc\n"
3999
4000                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4001                 "            OpSelectionMerge %cm None\n"
4002                 "            OpBranchConditional %comp %tb %fb\n"
4003                 "%tb       = OpLabel\n"
4004                 "            OpBranch %cm\n"
4005                 "%fb       = OpLabel\n"
4006                 "            OpBranch %cm\n"
4007                 "%cm       = OpLabel\n"
4008                 "%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4009                 "%res      = OpCompositeExtract %f32 %ares 5\n"
4010
4011                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4012                 "            OpStore %outloc %res\n"
4013                 "            OpReturn\n"
4014
4015                 "            OpFunctionEnd\n";
4016         specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4017         specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4018         specArray.numWorkGroups = IVec3(numElements, 1, 1);
4019
4020         specStruct.assembly =
4021                 string(getComputeAsmShaderPreamble()) +
4022
4023                 "OpSource GLSL 430\n"
4024                 "OpName %main \"main\"\n"
4025                 "OpName %id \"gl_GlobalInvocationID\"\n"
4026
4027                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4028
4029                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4030
4031                 "%id = OpVariable %uvec3ptr Input\n"
4032                 "%zero       = OpConstant %i32 0\n"
4033                 "%float_0    = OpConstant %f32 0.0\n"
4034                 "%float_1    = OpConstant %f32 1.0\n"
4035                 "%float_n1   = OpConstant %f32 -1.0\n"
4036
4037                 "%v2f32      = OpTypeVector %f32 2\n"
4038                 "%Data2      = OpTypeStruct %f32 %v2f32\n"
4039                 "%Data       = OpTypeStruct %Data2 %f32\n"
4040
4041                 "%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
4042                 "%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
4043                 "%s1         = OpConstantComposite %Data %in1b %float_1\n"
4044                 "%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4045                 "%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
4046                 "%s2         = OpConstantComposite %Data %in2b %float_n1\n"
4047
4048                 "%main     = OpFunction %void None %voidf\n"
4049                 "%entry    = OpLabel\n"
4050                 "%idval    = OpLoad %uvec3 %id\n"
4051                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4052                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4053                 "%inval    = OpLoad %f32 %inloc\n"
4054
4055                 "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4056                 "            OpSelectionMerge %cm None\n"
4057                 "            OpBranchConditional %comp %tb %fb\n"
4058                 "%tb       = OpLabel\n"
4059                 "            OpBranch %cm\n"
4060                 "%fb       = OpLabel\n"
4061                 "            OpBranch %cm\n"
4062                 "%cm       = OpLabel\n"
4063                 "%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
4064                 "%res      = OpCompositeExtract %f32 %sres 0 0\n"
4065
4066                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4067                 "            OpStore %outloc %res\n"
4068                 "            OpReturn\n"
4069
4070                 "            OpFunctionEnd\n";
4071         specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4072         specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4073         specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4074
4075         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4076         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4077         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4078         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4079         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4080         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4081         group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4082 }
4083
4084 string generateConstantDefinitions (int count)
4085 {
4086         std::ostringstream      r;
4087         for (int i = 0; i < count; i++)
4088                 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4089         r << "\n";
4090         return r.str();
4091 }
4092
4093 string generateSwitchCases (int count)
4094 {
4095         std::ostringstream      r;
4096         for (int i = 0; i < count; i++)
4097                 r << " " << i << " %case" << i;
4098         r << "\n";
4099         return r.str();
4100 }
4101
4102 string generateSwitchTargets (int count)
4103 {
4104         std::ostringstream      r;
4105         for (int i = 0; i < count; i++)
4106                 r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
4107         r << "\n";
4108         return r.str();
4109 }
4110
4111 string generateOpPhiParams (int count)
4112 {
4113         std::ostringstream      r;
4114         for (int i = 0; i < count; i++)
4115                 r << " %cf" << (i * 10 + 5) << " %case" << i;
4116         r << "\n";
4117         return r.str();
4118 }
4119
4120 string generateIntWidth (int value)
4121 {
4122         std::ostringstream      r;
4123         r << value;
4124         return r.str();
4125 }
4126
4127 // Expand input string by injecting "ABC" between the input
4128 // string characters. The acc/add/treshold parameters are used
4129 // to skip some of the injections to make the result less
4130 // uniform (and a lot shorter).
4131 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4132 {
4133         std::ostringstream      res;
4134         const char*                     p = s.c_str();
4135
4136         while (*p)
4137         {
4138                 res << *p;
4139                 acc += add;
4140                 if (acc > treshold)
4141                 {
4142                         acc -= treshold;
4143                         res << "ABC";
4144                 }
4145                 p++;
4146         }
4147         return res.str();
4148 }
4149
4150 // Calculate expected result based on the code string
4151 float calcOpPhiCase5 (float val, const string& s)
4152 {
4153         const char*             p               = s.c_str();
4154         float                   x[8];
4155         bool                    b[8];
4156         const float             tv[8]   = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4157         const float             v               = deFloatAbs(val);
4158         float                   res             = 0;
4159         int                             depth   = -1;
4160         int                             skip    = 0;
4161
4162         for (int i = 7; i >= 0; --i)
4163                 x[i] = std::fmod((float)v, (float)(2 << i));
4164         for (int i = 7; i >= 0; --i)
4165                 b[i] = x[i] > tv[i];
4166
4167         while (*p)
4168         {
4169                 if (*p == 'A')
4170                 {
4171                         depth++;
4172                         if (skip == 0 && b[depth])
4173                         {
4174                                 res++;
4175                         }
4176                         else
4177                                 skip++;
4178                 }
4179                 if (*p == 'B')
4180                 {
4181                         if (skip)
4182                                 skip--;
4183                         if (b[depth] || skip)
4184                                 skip++;
4185                 }
4186                 if (*p == 'C')
4187                 {
4188                         depth--;
4189                         if (skip)
4190                                 skip--;
4191                 }
4192                 p++;
4193         }
4194         return res;
4195 }
4196
4197 // In the code string, the letters represent the following:
4198 //
4199 // A:
4200 //     if (certain bit is set)
4201 //     {
4202 //       result++;
4203 //
4204 // B:
4205 //     } else {
4206 //
4207 // C:
4208 //     }
4209 //
4210 // examples:
4211 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4212 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4213 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4214 //
4215 // Code generation gets a bit complicated due to the else-branches,
4216 // which do not generate new values. Thus, the generator needs to
4217 // keep track of the previous variable change seen by the else
4218 // branch.
4219 string generateOpPhiCase5 (const string& s)
4220 {
4221         std::stack<int>                         idStack;
4222         std::stack<std::string>         value;
4223         std::stack<std::string>         valueLabel;
4224         std::stack<std::string>         mergeLeft;
4225         std::stack<std::string>         mergeRight;
4226         std::ostringstream                      res;
4227         const char*                                     p                       = s.c_str();
4228         int                                                     depth           = -1;
4229         int                                                     currId          = 0;
4230         int                                                     iter            = 0;
4231
4232         idStack.push(-1);
4233         value.push("%f32_0");
4234         valueLabel.push("%f32_0 %entry");
4235
4236         while (*p)
4237         {
4238                 if (*p == 'A')
4239                 {
4240                         depth++;
4241                         currId = iter;
4242                         idStack.push(currId);
4243                         res << "\tOpSelectionMerge %m" << currId << " None\n";
4244                         res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4245                         res << "%t" << currId << " = OpLabel\n";
4246                         res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4247                         std::ostringstream tag;
4248                         tag << "%rt" << currId;
4249                         value.push(tag.str());
4250                         tag << " %t" << currId;
4251                         valueLabel.push(tag.str());
4252                 }
4253
4254                 if (*p == 'B')
4255                 {
4256                         mergeLeft.push(valueLabel.top());
4257                         value.pop();
4258                         valueLabel.pop();
4259                         res << "\tOpBranch %m" << currId << "\n";
4260                         res << "%f" << currId << " = OpLabel\n";
4261                         std::ostringstream tag;
4262                         tag << value.top() << " %f" << currId;
4263                         valueLabel.pop();
4264                         valueLabel.push(tag.str());
4265                 }
4266
4267                 if (*p == 'C')
4268                 {
4269                         mergeRight.push(valueLabel.top());
4270                         res << "\tOpBranch %m" << currId << "\n";
4271                         res << "%m" << currId << " = OpLabel\n";
4272                         if (*(p + 1) == 0)
4273                                 res << "%res"; // last result goes to %res
4274                         else
4275                                 res << "%rm" << currId;
4276                         res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
4277                         std::ostringstream tag;
4278                         tag << "%rm" << currId;
4279                         value.pop();
4280                         value.push(tag.str());
4281                         tag << " %m" << currId;
4282                         valueLabel.pop();
4283                         valueLabel.push(tag.str());
4284                         mergeLeft.pop();
4285                         mergeRight.pop();
4286                         depth--;
4287                         idStack.pop();
4288                         currId = idStack.top();
4289                 }
4290                 p++;
4291                 iter++;
4292         }
4293         return res.str();
4294 }
4295
4296 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4297 {
4298         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4299         ComputeShaderSpec                               spec1;
4300         ComputeShaderSpec                               spec2;
4301         ComputeShaderSpec                               spec3;
4302         ComputeShaderSpec                               spec4;
4303         ComputeShaderSpec                               spec5;
4304         de::Random                                              rnd                             (deStringHash(group->getName()));
4305         const int                                               numElements             = 100;
4306         vector<float>                                   inputFloats             (numElements, 0);
4307         vector<float>                                   outputFloats1   (numElements, 0);
4308         vector<float>                                   outputFloats2   (numElements, 0);
4309         vector<float>                                   outputFloats3   (numElements, 0);
4310         vector<float>                                   outputFloats4   (numElements, 0);
4311         vector<float>                                   outputFloats5   (numElements, 0);
4312         std::string                                             codestring              = "ABC";
4313         const int                                               test4Width              = 1024;
4314
4315         // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4316         // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4317         // shader code.
4318         for (int i = 0, acc = 0; i < 9; i++)
4319                 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4320
4321         fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4322
4323         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4324         floorAll(inputFloats);
4325
4326         for (size_t ndx = 0; ndx < numElements; ++ndx)
4327         {
4328                 switch (ndx % 3)
4329                 {
4330                         case 0:         outputFloats1[ndx] = inputFloats[ndx] + 5.5f;   break;
4331                         case 1:         outputFloats1[ndx] = inputFloats[ndx] + 20.5f;  break;
4332                         case 2:         outputFloats1[ndx] = inputFloats[ndx] + 1.75f;  break;
4333                         default:        break;
4334                 }
4335                 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4336                 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4337
4338                 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4339                 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4340
4341                 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4342         }
4343
4344         spec1.assembly =
4345                 string(getComputeAsmShaderPreamble()) +
4346
4347                 "OpSource GLSL 430\n"
4348                 "OpName %main \"main\"\n"
4349                 "OpName %id \"gl_GlobalInvocationID\"\n"
4350
4351                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4352
4353                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4354
4355                 "%id = OpVariable %uvec3ptr Input\n"
4356                 "%zero       = OpConstant %i32 0\n"
4357                 "%three      = OpConstant %u32 3\n"
4358                 "%constf5p5  = OpConstant %f32 5.5\n"
4359                 "%constf20p5 = OpConstant %f32 20.5\n"
4360                 "%constf1p75 = OpConstant %f32 1.75\n"
4361                 "%constf8p5  = OpConstant %f32 8.5\n"
4362                 "%constf6p5  = OpConstant %f32 6.5\n"
4363
4364                 "%main     = OpFunction %void None %voidf\n"
4365                 "%entry    = OpLabel\n"
4366                 "%idval    = OpLoad %uvec3 %id\n"
4367                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4368                 "%selector = OpUMod %u32 %x %three\n"
4369                 "            OpSelectionMerge %phi None\n"
4370                 "            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4371
4372                 // Case 1 before OpPhi.
4373                 "%case1    = OpLabel\n"
4374                 "            OpBranch %phi\n"
4375
4376                 "%default  = OpLabel\n"
4377                 "            OpUnreachable\n"
4378
4379                 "%phi      = OpLabel\n"
4380                 "%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
4381                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4382                 "%inval    = OpLoad %f32 %inloc\n"
4383                 "%add      = OpFAdd %f32 %inval %operand\n"
4384                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4385                 "            OpStore %outloc %add\n"
4386                 "            OpReturn\n"
4387
4388                 // Case 0 after OpPhi.
4389                 "%case0    = OpLabel\n"
4390                 "            OpBranch %phi\n"
4391
4392
4393                 // Case 2 after OpPhi.
4394                 "%case2    = OpLabel\n"
4395                 "            OpBranch %phi\n"
4396
4397                 "            OpFunctionEnd\n";
4398         spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4399         spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4400         spec1.numWorkGroups = IVec3(numElements, 1, 1);
4401
4402         group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4403
4404         spec2.assembly =
4405                 string(getComputeAsmShaderPreamble()) +
4406
4407                 "OpName %main \"main\"\n"
4408                 "OpName %id \"gl_GlobalInvocationID\"\n"
4409
4410                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4411
4412                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4413
4414                 "%id         = OpVariable %uvec3ptr Input\n"
4415                 "%zero       = OpConstant %i32 0\n"
4416                 "%one        = OpConstant %i32 1\n"
4417                 "%three      = OpConstant %i32 3\n"
4418                 "%constf6p5  = OpConstant %f32 6.5\n"
4419
4420                 "%main       = OpFunction %void None %voidf\n"
4421                 "%entry      = OpLabel\n"
4422                 "%idval      = OpLoad %uvec3 %id\n"
4423                 "%x          = OpCompositeExtract %u32 %idval 0\n"
4424                 "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4425                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4426                 "%inval      = OpLoad %f32 %inloc\n"
4427                 "              OpBranch %phi\n"
4428
4429                 "%phi        = OpLabel\n"
4430                 "%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
4431                 "%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
4432                 "%step_next  = OpIAdd %i32 %step %one\n"
4433                 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4434                 "%still_loop = OpSLessThan %bool %step %three\n"
4435                 "              OpLoopMerge %exit %phi None\n"
4436                 "              OpBranchConditional %still_loop %phi %exit\n"
4437
4438                 "%exit       = OpLabel\n"
4439                 "              OpStore %outloc %accum\n"
4440                 "              OpReturn\n"
4441                 "              OpFunctionEnd\n";
4442         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4443         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4444         spec2.numWorkGroups = IVec3(numElements, 1, 1);
4445
4446         group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4447
4448         spec3.assembly =
4449                 string(getComputeAsmShaderPreamble()) +
4450
4451                 "OpName %main \"main\"\n"
4452                 "OpName %id \"gl_GlobalInvocationID\"\n"
4453
4454                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4455
4456                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4457
4458                 "%f32ptr_f   = OpTypePointer Function %f32\n"
4459                 "%id         = OpVariable %uvec3ptr Input\n"
4460                 "%true       = OpConstantTrue %bool\n"
4461                 "%false      = OpConstantFalse %bool\n"
4462                 "%zero       = OpConstant %i32 0\n"
4463                 "%constf8p5  = OpConstant %f32 8.5\n"
4464
4465                 "%main       = OpFunction %void None %voidf\n"
4466                 "%entry      = OpLabel\n"
4467                 "%b          = OpVariable %f32ptr_f Function %constf8p5\n"
4468                 "%idval      = OpLoad %uvec3 %id\n"
4469                 "%x          = OpCompositeExtract %u32 %idval 0\n"
4470                 "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4471                 "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4472                 "%a_init     = OpLoad %f32 %inloc\n"
4473                 "%b_init     = OpLoad %f32 %b\n"
4474                 "              OpBranch %phi\n"
4475
4476                 "%phi        = OpLabel\n"
4477                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
4478                 "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
4479                 "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
4480                 "              OpLoopMerge %exit %phi None\n"
4481                 "              OpBranchConditional %still_loop %phi %exit\n"
4482
4483                 "%exit       = OpLabel\n"
4484                 "%sub        = OpFSub %f32 %a_next %b_next\n"
4485                 "              OpStore %outloc %sub\n"
4486                 "              OpReturn\n"
4487                 "              OpFunctionEnd\n";
4488         spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4489         spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4490         spec3.numWorkGroups = IVec3(numElements, 1, 1);
4491
4492         group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4493
4494         spec4.assembly =
4495                 "OpCapability Shader\n"
4496                 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4497                 "OpMemoryModel Logical GLSL450\n"
4498                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4499                 "OpExecutionMode %main LocalSize 1 1 1\n"
4500
4501                 "OpSource GLSL 430\n"
4502                 "OpName %main \"main\"\n"
4503                 "OpName %id \"gl_GlobalInvocationID\"\n"
4504
4505                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4506
4507                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4508
4509                 "%id       = OpVariable %uvec3ptr Input\n"
4510                 "%zero     = OpConstant %i32 0\n"
4511                 "%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4512
4513                 + generateConstantDefinitions(test4Width) +
4514
4515                 "%main     = OpFunction %void None %voidf\n"
4516                 "%entry    = OpLabel\n"
4517                 "%idval    = OpLoad %uvec3 %id\n"
4518                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4519                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4520                 "%inval    = OpLoad %f32 %inloc\n"
4521                 "%xf       = OpConvertUToF %f32 %x\n"
4522                 "%xm       = OpFMul %f32 %xf %inval\n"
4523                 "%xa       = OpExtInst %f32 %ext FAbs %xm\n"
4524                 "%xi       = OpConvertFToU %u32 %xa\n"
4525                 "%selector = OpUMod %u32 %xi %cimod\n"
4526                 "            OpSelectionMerge %phi None\n"
4527                 "            OpSwitch %selector %default "
4528
4529                 + generateSwitchCases(test4Width) +
4530
4531                 "%default  = OpLabel\n"
4532                 "            OpUnreachable\n"
4533
4534                 + generateSwitchTargets(test4Width) +
4535
4536                 "%phi      = OpLabel\n"
4537                 "%result   = OpPhi %f32"
4538
4539                 + generateOpPhiParams(test4Width) +
4540
4541                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4542                 "            OpStore %outloc %result\n"
4543                 "            OpReturn\n"
4544
4545                 "            OpFunctionEnd\n";
4546         spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4547         spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4548         spec4.numWorkGroups = IVec3(numElements, 1, 1);
4549
4550         group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4551
4552         spec5.assembly =
4553                 "OpCapability Shader\n"
4554                 "%ext      = OpExtInstImport \"GLSL.std.450\"\n"
4555                 "OpMemoryModel Logical GLSL450\n"
4556                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4557                 "OpExecutionMode %main LocalSize 1 1 1\n"
4558                 "%code     = OpString \"" + codestring + "\"\n"
4559
4560                 "OpSource GLSL 430\n"
4561                 "OpName %main \"main\"\n"
4562                 "OpName %id \"gl_GlobalInvocationID\"\n"
4563
4564                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4565
4566                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4567
4568                 "%id       = OpVariable %uvec3ptr Input\n"
4569                 "%zero     = OpConstant %i32 0\n"
4570                 "%f32_0    = OpConstant %f32 0.0\n"
4571                 "%f32_0_5  = OpConstant %f32 0.5\n"
4572                 "%f32_1    = OpConstant %f32 1.0\n"
4573                 "%f32_1_5  = OpConstant %f32 1.5\n"
4574                 "%f32_2    = OpConstant %f32 2.0\n"
4575                 "%f32_3_5  = OpConstant %f32 3.5\n"
4576                 "%f32_4    = OpConstant %f32 4.0\n"
4577                 "%f32_7_5  = OpConstant %f32 7.5\n"
4578                 "%f32_8    = OpConstant %f32 8.0\n"
4579                 "%f32_15_5 = OpConstant %f32 15.5\n"
4580                 "%f32_16   = OpConstant %f32 16.0\n"
4581                 "%f32_31_5 = OpConstant %f32 31.5\n"
4582                 "%f32_32   = OpConstant %f32 32.0\n"
4583                 "%f32_63_5 = OpConstant %f32 63.5\n"
4584                 "%f32_64   = OpConstant %f32 64.0\n"
4585                 "%f32_127_5 = OpConstant %f32 127.5\n"
4586                 "%f32_128  = OpConstant %f32 128.0\n"
4587                 "%f32_256  = OpConstant %f32 256.0\n"
4588
4589                 "%main     = OpFunction %void None %voidf\n"
4590                 "%entry    = OpLabel\n"
4591                 "%idval    = OpLoad %uvec3 %id\n"
4592                 "%x        = OpCompositeExtract %u32 %idval 0\n"
4593                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4594                 "%inval    = OpLoad %f32 %inloc\n"
4595
4596                 "%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
4597                 "%x8       = OpFMod %f32 %xabs %f32_256\n"
4598                 "%x7       = OpFMod %f32 %xabs %f32_128\n"
4599                 "%x6       = OpFMod %f32 %xabs %f32_64\n"
4600                 "%x5       = OpFMod %f32 %xabs %f32_32\n"
4601                 "%x4       = OpFMod %f32 %xabs %f32_16\n"
4602                 "%x3       = OpFMod %f32 %xabs %f32_8\n"
4603                 "%x2       = OpFMod %f32 %xabs %f32_4\n"
4604                 "%x1       = OpFMod %f32 %xabs %f32_2\n"
4605
4606                 "%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4607                 "%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4608                 "%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4609                 "%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4610                 "%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4611                 "%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4612                 "%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4613                 "%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4614
4615                 + generateOpPhiCase5(codestring) +
4616
4617                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4618                 "            OpStore %outloc %res\n"
4619                 "            OpReturn\n"
4620
4621                 "            OpFunctionEnd\n";
4622         spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4623         spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4624         spec5.numWorkGroups = IVec3(numElements, 1, 1);
4625
4626         group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4627
4628         createOpPhiVartypeTests(group, testCtx);
4629
4630         return group.release();
4631 }
4632
4633 // Assembly code used for testing block order is based on GLSL source code:
4634 //
4635 // #version 430
4636 //
4637 // layout(std140, set = 0, binding = 0) readonly buffer Input {
4638 //   float elements[];
4639 // } input_data;
4640 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
4641 //   float elements[];
4642 // } output_data;
4643 //
4644 // void main() {
4645 //   uint x = gl_GlobalInvocationID.x;
4646 //   output_data.elements[x] = input_data.elements[x];
4647 //   if (x > uint(50)) {
4648 //     switch (x % uint(3)) {
4649 //       case 0: output_data.elements[x] += 1.5f; break;
4650 //       case 1: output_data.elements[x] += 42.f; break;
4651 //       case 2: output_data.elements[x] -= 27.f; break;
4652 //       default: break;
4653 //     }
4654 //   } else {
4655 //     output_data.elements[x] = -input_data.elements[x];
4656 //   }
4657 // }
4658 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
4659 {
4660         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
4661         ComputeShaderSpec                               spec;
4662         de::Random                                              rnd                             (deStringHash(group->getName()));
4663         const int                                               numElements             = 100;
4664         vector<float>                                   inputFloats             (numElements, 0);
4665         vector<float>                                   outputFloats    (numElements, 0);
4666
4667         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
4668
4669         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4670         floorAll(inputFloats);
4671
4672         for (size_t ndx = 0; ndx <= 50; ++ndx)
4673                 outputFloats[ndx] = -inputFloats[ndx];
4674
4675         for (size_t ndx = 51; ndx < numElements; ++ndx)
4676         {
4677                 switch (ndx % 3)
4678                 {
4679                         case 0:         outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
4680                         case 1:         outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
4681                         case 2:         outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
4682                         default:        break;
4683                 }
4684         }
4685
4686         spec.assembly =
4687                 string(getComputeAsmShaderPreamble()) +
4688
4689                 "OpSource GLSL 430\n"
4690                 "OpName %main \"main\"\n"
4691                 "OpName %id \"gl_GlobalInvocationID\"\n"
4692
4693                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4694
4695                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4696
4697                 "%u32ptr       = OpTypePointer Function %u32\n"
4698                 "%u32ptr_input = OpTypePointer Input %u32\n"
4699
4700                 + string(getComputeAsmInputOutputBuffer()) +
4701
4702                 "%id        = OpVariable %uvec3ptr Input\n"
4703                 "%zero      = OpConstant %i32 0\n"
4704                 "%const3    = OpConstant %u32 3\n"
4705                 "%const50   = OpConstant %u32 50\n"
4706                 "%constf1p5 = OpConstant %f32 1.5\n"
4707                 "%constf27  = OpConstant %f32 27.0\n"
4708                 "%constf42  = OpConstant %f32 42.0\n"
4709
4710                 "%main = OpFunction %void None %voidf\n"
4711
4712                 // entry block.
4713                 "%entry    = OpLabel\n"
4714
4715                 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
4716                 "%xvar     = OpVariable %u32ptr Function\n"
4717                 "%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
4718                 "%x        = OpLoad %u32 %xptr\n"
4719                 "            OpStore %xvar %x\n"
4720
4721                 "%cmp      = OpUGreaterThan %bool %x %const50\n"
4722                 "            OpSelectionMerge %if_merge None\n"
4723                 "            OpBranchConditional %cmp %if_true %if_false\n"
4724
4725                 // False branch for if-statement: placed in the middle of switch cases and before true branch.
4726                 "%if_false = OpLabel\n"
4727                 "%x_f      = OpLoad %u32 %xvar\n"
4728                 "%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
4729                 "%inval_f  = OpLoad %f32 %inloc_f\n"
4730                 "%negate   = OpFNegate %f32 %inval_f\n"
4731                 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
4732                 "            OpStore %outloc_f %negate\n"
4733                 "            OpBranch %if_merge\n"
4734
4735                 // Merge block for if-statement: placed in the middle of true and false branch.
4736                 "%if_merge = OpLabel\n"
4737                 "            OpReturn\n"
4738
4739                 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
4740                 "%if_true  = OpLabel\n"
4741                 "%xval_t   = OpLoad %u32 %xvar\n"
4742                 "%mod      = OpUMod %u32 %xval_t %const3\n"
4743                 "            OpSelectionMerge %switch_merge None\n"
4744                 "            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
4745
4746                 // Merge block for switch-statement: placed before the case
4747                 // bodies.  But it must follow OpSwitch which dominates it.
4748                 "%switch_merge = OpLabel\n"
4749                 "                OpBranch %if_merge\n"
4750
4751                 // Case 1 for switch-statement: placed before case 0.
4752                 // It must follow the OpSwitch that dominates it.
4753                 "%case1    = OpLabel\n"
4754                 "%x_1      = OpLoad %u32 %xvar\n"
4755                 "%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
4756                 "%inval_1  = OpLoad %f32 %inloc_1\n"
4757                 "%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
4758                 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
4759                 "            OpStore %outloc_1 %addf42\n"
4760                 "            OpBranch %switch_merge\n"
4761
4762                 // Case 2 for switch-statement.
4763                 "%case2    = OpLabel\n"
4764                 "%x_2      = OpLoad %u32 %xvar\n"
4765                 "%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
4766                 "%inval_2  = OpLoad %f32 %inloc_2\n"
4767                 "%subf27   = OpFSub %f32 %inval_2 %constf27\n"
4768                 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
4769                 "            OpStore %outloc_2 %subf27\n"
4770                 "            OpBranch %switch_merge\n"
4771
4772                 // Default case for switch-statement: placed in the middle of normal cases.
4773                 "%default = OpLabel\n"
4774                 "           OpBranch %switch_merge\n"
4775
4776                 // Case 0 for switch-statement: out of order.
4777                 "%case0    = OpLabel\n"
4778                 "%x_0      = OpLoad %u32 %xvar\n"
4779                 "%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
4780                 "%inval_0  = OpLoad %f32 %inloc_0\n"
4781                 "%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
4782                 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
4783                 "            OpStore %outloc_0 %addf1p5\n"
4784                 "            OpBranch %switch_merge\n"
4785
4786                 "            OpFunctionEnd\n";
4787         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4788         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4789         spec.numWorkGroups = IVec3(numElements, 1, 1);
4790
4791         group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
4792
4793         return group.release();
4794 }
4795
4796 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
4797 {
4798         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
4799         ComputeShaderSpec                               spec1;
4800         ComputeShaderSpec                               spec2;
4801         de::Random                                              rnd                             (deStringHash(group->getName()));
4802         const int                                               numElements             = 100;
4803         vector<float>                                   inputFloats             (numElements, 0);
4804         vector<float>                                   outputFloats1   (numElements, 0);
4805         vector<float>                                   outputFloats2   (numElements, 0);
4806         fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
4807
4808         for (size_t ndx = 0; ndx < numElements; ++ndx)
4809         {
4810                 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
4811                 outputFloats2[ndx] = -inputFloats[ndx];
4812         }
4813
4814         const string assembly(
4815                 "OpCapability Shader\n"
4816                 "OpMemoryModel Logical GLSL450\n"
4817                 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
4818                 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
4819                 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
4820                 "OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
4821                 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
4822                 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
4823
4824                 "OpName %comp_main1              \"entrypoint1\"\n"
4825                 "OpName %comp_main2              \"entrypoint2\"\n"
4826                 "OpName %vert_main               \"entrypoint2\"\n"
4827                 "OpName %id                      \"gl_GlobalInvocationID\"\n"
4828                 "OpName %vert_builtin_st         \"gl_PerVertex\"\n"
4829                 "OpName %vertexIndex             \"gl_VertexIndex\"\n"
4830                 "OpName %instanceIndex           \"gl_InstanceIndex\"\n"
4831                 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
4832                 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
4833                 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
4834
4835                 "OpDecorate %id                      BuiltIn GlobalInvocationId\n"
4836                 "OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
4837                 "OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
4838                 "OpDecorate %vert_builtin_st         Block\n"
4839                 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
4840                 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
4841                 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
4842
4843                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4844
4845                 "%zero       = OpConstant %i32 0\n"
4846                 "%one        = OpConstant %u32 1\n"
4847                 "%c_f32_1    = OpConstant %f32 1\n"
4848
4849                 "%i32inputptr         = OpTypePointer Input %i32\n"
4850                 "%vec4                = OpTypeVector %f32 4\n"
4851                 "%vec4ptr             = OpTypePointer Output %vec4\n"
4852                 "%f32arr1             = OpTypeArray %f32 %one\n"
4853                 "%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
4854                 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
4855                 "%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
4856
4857                 "%id         = OpVariable %uvec3ptr Input\n"
4858                 "%vertexIndex = OpVariable %i32inputptr Input\n"
4859                 "%instanceIndex = OpVariable %i32inputptr Input\n"
4860                 "%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
4861
4862                 // gl_Position = vec4(1.);
4863                 "%vert_main  = OpFunction %void None %voidf\n"
4864                 "%vert_entry = OpLabel\n"
4865                 "%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
4866                 "              OpStore %position %c_vec4_1\n"
4867                 "              OpReturn\n"
4868                 "              OpFunctionEnd\n"
4869
4870                 // Double inputs.
4871                 "%comp_main1  = OpFunction %void None %voidf\n"
4872                 "%comp1_entry = OpLabel\n"
4873                 "%idval1      = OpLoad %uvec3 %id\n"
4874                 "%x1          = OpCompositeExtract %u32 %idval1 0\n"
4875                 "%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
4876                 "%inval1      = OpLoad %f32 %inloc1\n"
4877                 "%add         = OpFAdd %f32 %inval1 %inval1\n"
4878                 "%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
4879                 "               OpStore %outloc1 %add\n"
4880                 "               OpReturn\n"
4881                 "               OpFunctionEnd\n"
4882
4883                 // Negate inputs.
4884                 "%comp_main2  = OpFunction %void None %voidf\n"
4885                 "%comp2_entry = OpLabel\n"
4886                 "%idval2      = OpLoad %uvec3 %id\n"
4887                 "%x2          = OpCompositeExtract %u32 %idval2 0\n"
4888                 "%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
4889                 "%inval2      = OpLoad %f32 %inloc2\n"
4890                 "%neg         = OpFNegate %f32 %inval2\n"
4891                 "%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
4892                 "               OpStore %outloc2 %neg\n"
4893                 "               OpReturn\n"
4894                 "               OpFunctionEnd\n");
4895
4896         spec1.assembly = assembly;
4897         spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4898         spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4899         spec1.numWorkGroups = IVec3(numElements, 1, 1);
4900         spec1.entryPoint = "entrypoint1";
4901
4902         spec2.assembly = assembly;
4903         spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4904         spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4905         spec2.numWorkGroups = IVec3(numElements, 1, 1);
4906         spec2.entryPoint = "entrypoint2";
4907
4908         group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
4909         group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
4910
4911         return group.release();
4912 }
4913
4914 inline std::string makeLongUTF8String (size_t num4ByteChars)
4915 {
4916         // An example of a longest valid UTF-8 character.  Be explicit about the
4917         // character type because Microsoft compilers can otherwise interpret the
4918         // character string as being over wide (16-bit) characters. Ideally, we
4919         // would just use a C++11 UTF-8 string literal, but we want to support older
4920         // Microsoft compilers.
4921         const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
4922         std::string longString;
4923         longString.reserve(num4ByteChars * 4);
4924         for (size_t count = 0; count < num4ByteChars; count++)
4925         {
4926                 longString += earthAfrica;
4927         }
4928         return longString;
4929 }
4930
4931 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
4932 {
4933         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
4934         vector<CaseParameter>                   cases;
4935         de::Random                                              rnd                             (deStringHash(group->getName()));
4936         const int                                               numElements             = 100;
4937         vector<float>                                   positiveFloats  (numElements, 0);
4938         vector<float>                                   negativeFloats  (numElements, 0);
4939         const StringTemplate                    shaderTemplate  (
4940                 "OpCapability Shader\n"
4941                 "OpMemoryModel Logical GLSL450\n"
4942
4943                 "OpEntryPoint GLCompute %main \"main\" %id\n"
4944                 "OpExecutionMode %main LocalSize 1 1 1\n"
4945
4946                 "${SOURCE}\n"
4947
4948                 "OpName %main           \"main\"\n"
4949                 "OpName %id             \"gl_GlobalInvocationID\"\n"
4950
4951                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4952
4953                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4954
4955                 "%id        = OpVariable %uvec3ptr Input\n"
4956                 "%zero      = OpConstant %i32 0\n"
4957
4958                 "%main      = OpFunction %void None %voidf\n"
4959                 "%label     = OpLabel\n"
4960                 "%idval     = OpLoad %uvec3 %id\n"
4961                 "%x         = OpCompositeExtract %u32 %idval 0\n"
4962                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
4963                 "%inval     = OpLoad %f32 %inloc\n"
4964                 "%neg       = OpFNegate %f32 %inval\n"
4965                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
4966                 "             OpStore %outloc %neg\n"
4967                 "             OpReturn\n"
4968                 "             OpFunctionEnd\n");
4969
4970         cases.push_back(CaseParameter("unknown_source",                                                 "OpSource Unknown 0"));
4971         cases.push_back(CaseParameter("wrong_source",                                                   "OpSource OpenCL_C 210"));
4972         cases.push_back(CaseParameter("normal_filename",                                                "%fname = OpString \"filename\"\n"
4973                                                                                                                                                         "OpSource GLSL 430 %fname"));
4974         cases.push_back(CaseParameter("empty_filename",                                                 "%fname = OpString \"\"\n"
4975                                                                                                                                                         "OpSource GLSL 430 %fname"));
4976         cases.push_back(CaseParameter("normal_source_code",                                             "%fname = OpString \"filename\"\n"
4977                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
4978         cases.push_back(CaseParameter("empty_source_code",                                              "%fname = OpString \"filename\"\n"
4979                                                                                                                                                         "OpSource GLSL 430 %fname \"\""));
4980         cases.push_back(CaseParameter("long_source_code",                                               "%fname = OpString \"filename\"\n"
4981                                                                                                                                                         "OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
4982         cases.push_back(CaseParameter("utf8_source_code",                                               "%fname = OpString \"filename\"\n"
4983                                                                                                                                                         "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
4984         cases.push_back(CaseParameter("normal_sourcecontinued",                                 "%fname = OpString \"filename\"\n"
4985                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
4986                                                                                                                                                         "OpSourceContinued \"id main() {}\""));
4987         cases.push_back(CaseParameter("empty_sourcecontinued",                                  "%fname = OpString \"filename\"\n"
4988                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
4989                                                                                                                                                         "OpSourceContinued \"\""));
4990         cases.push_back(CaseParameter("long_sourcecontinued",                                   "%fname = OpString \"filename\"\n"
4991                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
4992                                                                                                                                                         "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
4993         cases.push_back(CaseParameter("utf8_sourcecontinued",                                   "%fname = OpString \"filename\"\n"
4994                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
4995                                                                                                                                                         "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
4996         cases.push_back(CaseParameter("multi_sourcecontinued",                                  "%fname = OpString \"filename\"\n"
4997                                                                                                                                                         "OpSource GLSL 430 %fname \"#version 430\n\"\n"
4998                                                                                                                                                         "OpSourceContinued \"void\"\n"
4999                                                                                                                                                         "OpSourceContinued \"main()\"\n"
5000                                                                                                                                                         "OpSourceContinued \"{}\""));
5001         cases.push_back(CaseParameter("empty_source_before_sourcecontinued",    "%fname = OpString \"filename\"\n"
5002                                                                                                                                                         "OpSource GLSL 430 %fname \"\"\n"
5003                                                                                                                                                         "OpSourceContinued \"#version 430\nvoid main() {}\""));
5004
5005         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5006
5007         for (size_t ndx = 0; ndx < numElements; ++ndx)
5008                 negativeFloats[ndx] = -positiveFloats[ndx];
5009
5010         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5011         {
5012                 map<string, string>             specializations;
5013                 ComputeShaderSpec               spec;
5014
5015                 specializations["SOURCE"] = cases[caseNdx].param;
5016                 spec.assembly = shaderTemplate.specialize(specializations);
5017                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5018                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5019                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5020
5021                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5022         }
5023
5024         return group.release();
5025 }
5026
5027 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5028 {
5029         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5030         vector<CaseParameter>                   cases;
5031         de::Random                                              rnd                             (deStringHash(group->getName()));
5032         const int                                               numElements             = 100;
5033         vector<float>                                   inputFloats             (numElements, 0);
5034         vector<float>                                   outputFloats    (numElements, 0);
5035         const StringTemplate                    shaderTemplate  (
5036                 string(getComputeAsmShaderPreamble()) +
5037
5038                 "OpSourceExtension \"${EXTENSION}\"\n"
5039
5040                 "OpName %main           \"main\"\n"
5041                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5042
5043                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5044
5045                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5046
5047                 "%id        = OpVariable %uvec3ptr Input\n"
5048                 "%zero      = OpConstant %i32 0\n"
5049
5050                 "%main      = OpFunction %void None %voidf\n"
5051                 "%label     = OpLabel\n"
5052                 "%idval     = OpLoad %uvec3 %id\n"
5053                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5054                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5055                 "%inval     = OpLoad %f32 %inloc\n"
5056                 "%neg       = OpFNegate %f32 %inval\n"
5057                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5058                 "             OpStore %outloc %neg\n"
5059                 "             OpReturn\n"
5060                 "             OpFunctionEnd\n");
5061
5062         cases.push_back(CaseParameter("empty_extension",        ""));
5063         cases.push_back(CaseParameter("real_extension",         "GL_ARB_texture_rectangle"));
5064         cases.push_back(CaseParameter("fake_extension",         "GL_ARB_im_the_ultimate_extension"));
5065         cases.push_back(CaseParameter("utf8_extension",         "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5066         cases.push_back(CaseParameter("long_extension",         makeLongUTF8String(65533) + "ccc")); // word count: 65535
5067
5068         fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5069
5070         for (size_t ndx = 0; ndx < numElements; ++ndx)
5071                 outputFloats[ndx] = -inputFloats[ndx];
5072
5073         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5074         {
5075                 map<string, string>             specializations;
5076                 ComputeShaderSpec               spec;
5077
5078                 specializations["EXTENSION"] = cases[caseNdx].param;
5079                 spec.assembly = shaderTemplate.specialize(specializations);
5080                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5081                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5082                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5083
5084                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5085         }
5086
5087         return group.release();
5088 }
5089
5090 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
5091 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5092 {
5093         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5094         vector<CaseParameter>                   cases;
5095         de::Random                                              rnd                             (deStringHash(group->getName()));
5096         const int                                               numElements             = 100;
5097         vector<float>                                   positiveFloats  (numElements, 0);
5098         vector<float>                                   negativeFloats  (numElements, 0);
5099         const StringTemplate                    shaderTemplate  (
5100                 string(getComputeAsmShaderPreamble()) +
5101
5102                 "OpSource GLSL 430\n"
5103                 "OpName %main           \"main\"\n"
5104                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5105
5106                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5107
5108                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5109                 "%uvec2     = OpTypeVector %u32 2\n"
5110                 "%bvec3     = OpTypeVector %bool 3\n"
5111                 "%fvec4     = OpTypeVector %f32 4\n"
5112                 "%fmat33    = OpTypeMatrix %fvec3 3\n"
5113                 "%const100  = OpConstant %u32 100\n"
5114                 "%uarr100   = OpTypeArray %i32 %const100\n"
5115                 "%struct    = OpTypeStruct %f32 %i32 %u32\n"
5116                 "%pointer   = OpTypePointer Function %i32\n"
5117                 + string(getComputeAsmInputOutputBuffer()) +
5118
5119                 "%null      = OpConstantNull ${TYPE}\n"
5120
5121                 "%id        = OpVariable %uvec3ptr Input\n"
5122                 "%zero      = OpConstant %i32 0\n"
5123
5124                 "%main      = OpFunction %void None %voidf\n"
5125                 "%label     = OpLabel\n"
5126                 "%idval     = OpLoad %uvec3 %id\n"
5127                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5128                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5129                 "%inval     = OpLoad %f32 %inloc\n"
5130                 "%neg       = OpFNegate %f32 %inval\n"
5131                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5132                 "             OpStore %outloc %neg\n"
5133                 "             OpReturn\n"
5134                 "             OpFunctionEnd\n");
5135
5136         cases.push_back(CaseParameter("bool",                   "%bool"));
5137         cases.push_back(CaseParameter("sint32",                 "%i32"));
5138         cases.push_back(CaseParameter("uint32",                 "%u32"));
5139         cases.push_back(CaseParameter("float32",                "%f32"));
5140         cases.push_back(CaseParameter("vec4float32",    "%fvec4"));
5141         cases.push_back(CaseParameter("vec3bool",               "%bvec3"));
5142         cases.push_back(CaseParameter("vec2uint32",             "%uvec2"));
5143         cases.push_back(CaseParameter("matrix",                 "%fmat33"));
5144         cases.push_back(CaseParameter("array",                  "%uarr100"));
5145         cases.push_back(CaseParameter("struct",                 "%struct"));
5146         cases.push_back(CaseParameter("pointer",                "%pointer"));
5147
5148         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5149
5150         for (size_t ndx = 0; ndx < numElements; ++ndx)
5151                 negativeFloats[ndx] = -positiveFloats[ndx];
5152
5153         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5154         {
5155                 map<string, string>             specializations;
5156                 ComputeShaderSpec               spec;
5157
5158                 specializations["TYPE"] = cases[caseNdx].param;
5159                 spec.assembly = shaderTemplate.specialize(specializations);
5160                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5161                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5162                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5163
5164                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5165         }
5166
5167         return group.release();
5168 }
5169
5170 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5171 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5172 {
5173         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5174         vector<CaseParameter>                   cases;
5175         de::Random                                              rnd                             (deStringHash(group->getName()));
5176         const int                                               numElements             = 100;
5177         vector<float>                                   positiveFloats  (numElements, 0);
5178         vector<float>                                   negativeFloats  (numElements, 0);
5179         const StringTemplate                    shaderTemplate  (
5180                 string(getComputeAsmShaderPreamble()) +
5181
5182                 "OpSource GLSL 430\n"
5183                 "OpName %main           \"main\"\n"
5184                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5185
5186                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5187
5188                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5189
5190                 "%id        = OpVariable %uvec3ptr Input\n"
5191                 "%zero      = OpConstant %i32 0\n"
5192
5193                 "${CONSTANT}\n"
5194
5195                 "%main      = OpFunction %void None %voidf\n"
5196                 "%label     = OpLabel\n"
5197                 "%idval     = OpLoad %uvec3 %id\n"
5198                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5199                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5200                 "%inval     = OpLoad %f32 %inloc\n"
5201                 "%neg       = OpFNegate %f32 %inval\n"
5202                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5203                 "             OpStore %outloc %neg\n"
5204                 "             OpReturn\n"
5205                 "             OpFunctionEnd\n");
5206
5207         cases.push_back(CaseParameter("vector",                 "%five = OpConstant %u32 5\n"
5208                                                                                                         "%const = OpConstantComposite %uvec3 %five %zero %five"));
5209         cases.push_back(CaseParameter("matrix",                 "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5210                                                                                                         "%ten = OpConstant %f32 10.\n"
5211                                                                                                         "%fzero = OpConstant %f32 0.\n"
5212                                                                                                         "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5213                                                                                                         "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5214         cases.push_back(CaseParameter("struct",                 "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5215                                                                                                         "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5216                                                                                                         "%fzero = OpConstant %f32 0.\n"
5217                                                                                                         "%one = OpConstant %f32 1.\n"
5218                                                                                                         "%point5 = OpConstant %f32 0.5\n"
5219                                                                                                         "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5220                                                                                                         "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5221                                                                                                         "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5222         cases.push_back(CaseParameter("nested_struct",  "%st1 = OpTypeStruct %u32 %f32\n"
5223                                                                                                         "%st2 = OpTypeStruct %i32 %i32\n"
5224                                                                                                         "%struct = OpTypeStruct %st1 %st2\n"
5225                                                                                                         "%point5 = OpConstant %f32 0.5\n"
5226                                                                                                         "%one = OpConstant %u32 1\n"
5227                                                                                                         "%ten = OpConstant %i32 10\n"
5228                                                                                                         "%st1val = OpConstantComposite %st1 %one %point5\n"
5229                                                                                                         "%st2val = OpConstantComposite %st2 %ten %ten\n"
5230                                                                                                         "%const = OpConstantComposite %struct %st1val %st2val"));
5231
5232         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5233
5234         for (size_t ndx = 0; ndx < numElements; ++ndx)
5235                 negativeFloats[ndx] = -positiveFloats[ndx];
5236
5237         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5238         {
5239                 map<string, string>             specializations;
5240                 ComputeShaderSpec               spec;
5241
5242                 specializations["CONSTANT"] = cases[caseNdx].param;
5243                 spec.assembly = shaderTemplate.specialize(specializations);
5244                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5245                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5246                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5247
5248                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5249         }
5250
5251         return group.release();
5252 }
5253
5254 // Creates a floating point number with the given exponent, and significand
5255 // bits set. It can only create normalized numbers. Only the least significant
5256 // 24 bits of the significand will be examined. The final bit of the
5257 // significand will also be ignored. This allows alignment to be written
5258 // similarly to C99 hex-floats.
5259 // For example if you wanted to write 0x1.7f34p-12 you would call
5260 // constructNormalizedFloat(-12, 0x7f3400)
5261 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5262 {
5263         float f = 1.0f;
5264
5265         for (deInt32 idx = 0; idx < 23; ++idx)
5266         {
5267                 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5268                 significand <<= 1;
5269         }
5270
5271         return std::ldexp(f, exponent);
5272 }
5273
5274 // Compare instruction for the OpQuantizeF16 compute exact case.
5275 // Returns true if the output is what is expected from the test case.
5276 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5277 {
5278         if (outputAllocs.size() != 1)
5279                 return false;
5280
5281         // Only size is needed because we cannot compare Nans.
5282         size_t byteSize = expectedOutputs[0].getByteSize();
5283
5284         const float*    outputAsFloat   = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5285
5286         if (byteSize != 4*sizeof(float)) {
5287                 return false;
5288         }
5289
5290         if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5291                 *outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5292                 return false;
5293         }
5294         outputAsFloat++;
5295
5296         if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5297                 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5298                 return false;
5299         }
5300         outputAsFloat++;
5301
5302         if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5303                 *outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5304                 return false;
5305         }
5306         outputAsFloat++;
5307
5308         if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5309                 *outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5310                 return false;
5311         }
5312
5313         return true;
5314 }
5315
5316 // Checks that every output from a test-case is a float NaN.
5317 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5318 {
5319         if (outputAllocs.size() != 1)
5320                 return false;
5321
5322         // Only size is needed because we cannot compare Nans.
5323         size_t byteSize = expectedOutputs[0].getByteSize();
5324
5325         const float* const      output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5326
5327         for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5328         {
5329                 if (!deFloatIsNaN(output_as_float[idx]))
5330                 {
5331                         return false;
5332                 }
5333         }
5334
5335         return true;
5336 }
5337
5338 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5339 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5340 {
5341         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5342
5343         const std::string shader (
5344                 string(getComputeAsmShaderPreamble()) +
5345
5346                 "OpSource GLSL 430\n"
5347                 "OpName %main           \"main\"\n"
5348                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5349
5350                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5351
5352                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5353
5354                 "%id        = OpVariable %uvec3ptr Input\n"
5355                 "%zero      = OpConstant %i32 0\n"
5356
5357                 "%main      = OpFunction %void None %voidf\n"
5358                 "%label     = OpLabel\n"
5359                 "%idval     = OpLoad %uvec3 %id\n"
5360                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5361                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5362                 "%inval     = OpLoad %f32 %inloc\n"
5363                 "%quant     = OpQuantizeToF16 %f32 %inval\n"
5364                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5365                 "             OpStore %outloc %quant\n"
5366                 "             OpReturn\n"
5367                 "             OpFunctionEnd\n");
5368
5369         {
5370                 ComputeShaderSpec       spec;
5371                 const deUint32          numElements             = 100;
5372                 vector<float>           infinities;
5373                 vector<float>           results;
5374
5375                 infinities.reserve(numElements);
5376                 results.reserve(numElements);
5377
5378                 for (size_t idx = 0; idx < numElements; ++idx)
5379                 {
5380                         switch(idx % 4)
5381                         {
5382                                 case 0:
5383                                         infinities.push_back(std::numeric_limits<float>::infinity());
5384                                         results.push_back(std::numeric_limits<float>::infinity());
5385                                         break;
5386                                 case 1:
5387                                         infinities.push_back(-std::numeric_limits<float>::infinity());
5388                                         results.push_back(-std::numeric_limits<float>::infinity());
5389                                         break;
5390                                 case 2:
5391                                         infinities.push_back(std::ldexp(1.0f, 16));
5392                                         results.push_back(std::numeric_limits<float>::infinity());
5393                                         break;
5394                                 case 3:
5395                                         infinities.push_back(std::ldexp(-1.0f, 32));
5396                                         results.push_back(-std::numeric_limits<float>::infinity());
5397                                         break;
5398                         }
5399                 }
5400
5401                 spec.assembly = shader;
5402                 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5403                 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5404                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5405
5406                 group->addChild(new SpvAsmComputeShaderCase(
5407                         testCtx, "infinities", "Check that infinities propagated and created", spec));
5408         }
5409
5410         {
5411                 ComputeShaderSpec       spec;
5412                 vector<float>           nans;
5413                 const deUint32          numElements             = 100;
5414
5415                 nans.reserve(numElements);
5416
5417                 for (size_t idx = 0; idx < numElements; ++idx)
5418                 {
5419                         if (idx % 2 == 0)
5420                         {
5421                                 nans.push_back(std::numeric_limits<float>::quiet_NaN());
5422                         }
5423                         else
5424                         {
5425                                 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5426                         }
5427                 }
5428
5429                 spec.assembly = shader;
5430                 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5431                 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5432                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5433                 spec.verifyIO = &compareNan;
5434
5435                 group->addChild(new SpvAsmComputeShaderCase(
5436                         testCtx, "propagated_nans", "Check that nans are propagated", spec));
5437         }
5438
5439         {
5440                 ComputeShaderSpec       spec;
5441                 vector<float>           small;
5442                 vector<float>           zeros;
5443                 const deUint32          numElements             = 100;
5444
5445                 small.reserve(numElements);
5446                 zeros.reserve(numElements);
5447
5448                 for (size_t idx = 0; idx < numElements; ++idx)
5449                 {
5450                         switch(idx % 6)
5451                         {
5452                                 case 0:
5453                                         small.push_back(0.f);
5454                                         zeros.push_back(0.f);
5455                                         break;
5456                                 case 1:
5457                                         small.push_back(-0.f);
5458                                         zeros.push_back(-0.f);
5459                                         break;
5460                                 case 2:
5461                                         small.push_back(std::ldexp(1.0f, -16));
5462                                         zeros.push_back(0.f);
5463                                         break;
5464                                 case 3:
5465                                         small.push_back(std::ldexp(-1.0f, -32));
5466                                         zeros.push_back(-0.f);
5467                                         break;
5468                                 case 4:
5469                                         small.push_back(std::ldexp(1.0f, -127));
5470                                         zeros.push_back(0.f);
5471                                         break;
5472                                 case 5:
5473                                         small.push_back(-std::ldexp(1.0f, -128));
5474                                         zeros.push_back(-0.f);
5475                                         break;
5476                         }
5477                 }
5478
5479                 spec.assembly = shader;
5480                 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5481                 spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5482                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5483
5484                 group->addChild(new SpvAsmComputeShaderCase(
5485                         testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5486         }
5487
5488         {
5489                 ComputeShaderSpec       spec;
5490                 vector<float>           exact;
5491                 const deUint32          numElements             = 200;
5492
5493                 exact.reserve(numElements);
5494
5495                 for (size_t idx = 0; idx < numElements; ++idx)
5496                         exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5497
5498                 spec.assembly = shader;
5499                 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5500                 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5501                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5502
5503                 group->addChild(new SpvAsmComputeShaderCase(
5504                         testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5505         }
5506
5507         {
5508                 ComputeShaderSpec       spec;
5509                 vector<float>           inputs;
5510                 const deUint32          numElements             = 4;
5511
5512                 inputs.push_back(constructNormalizedFloat(8,    0x300300));
5513                 inputs.push_back(-constructNormalizedFloat(-7,  0x600800));
5514                 inputs.push_back(constructNormalizedFloat(2,    0x01E000));
5515                 inputs.push_back(constructNormalizedFloat(1,    0xFFE000));
5516
5517                 spec.assembly = shader;
5518                 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5519                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5520                 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5521                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5522
5523                 group->addChild(new SpvAsmComputeShaderCase(
5524                         testCtx, "rounded", "Check that are rounded when needed", spec));
5525         }
5526
5527         return group.release();
5528 }
5529
5530 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5531 {
5532         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5533
5534         const std::string shader (
5535                 string(getComputeAsmShaderPreamble()) +
5536
5537                 "OpName %main           \"main\"\n"
5538                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5539
5540                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5541
5542                 "OpDecorate %sc_0  SpecId 0\n"
5543                 "OpDecorate %sc_1  SpecId 1\n"
5544                 "OpDecorate %sc_2  SpecId 2\n"
5545                 "OpDecorate %sc_3  SpecId 3\n"
5546                 "OpDecorate %sc_4  SpecId 4\n"
5547                 "OpDecorate %sc_5  SpecId 5\n"
5548
5549                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5550
5551                 "%id        = OpVariable %uvec3ptr Input\n"
5552                 "%zero      = OpConstant %i32 0\n"
5553                 "%c_u32_6   = OpConstant %u32 6\n"
5554
5555                 "%sc_0      = OpSpecConstant %f32 0.\n"
5556                 "%sc_1      = OpSpecConstant %f32 0.\n"
5557                 "%sc_2      = OpSpecConstant %f32 0.\n"
5558                 "%sc_3      = OpSpecConstant %f32 0.\n"
5559                 "%sc_4      = OpSpecConstant %f32 0.\n"
5560                 "%sc_5      = OpSpecConstant %f32 0.\n"
5561
5562                 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5563                 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5564                 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5565                 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5566                 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5567                 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5568
5569                 "%main      = OpFunction %void None %voidf\n"
5570                 "%label     = OpLabel\n"
5571                 "%idval     = OpLoad %uvec3 %id\n"
5572                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5573                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5574                 "%selector  = OpUMod %u32 %x %c_u32_6\n"
5575                 "            OpSelectionMerge %exit None\n"
5576                 "            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5577
5578                 "%case0     = OpLabel\n"
5579                 "             OpStore %outloc %sc_0_quant\n"
5580                 "             OpBranch %exit\n"
5581
5582                 "%case1     = OpLabel\n"
5583                 "             OpStore %outloc %sc_1_quant\n"
5584                 "             OpBranch %exit\n"
5585
5586                 "%case2     = OpLabel\n"
5587                 "             OpStore %outloc %sc_2_quant\n"
5588                 "             OpBranch %exit\n"
5589
5590                 "%case3     = OpLabel\n"
5591                 "             OpStore %outloc %sc_3_quant\n"
5592                 "             OpBranch %exit\n"
5593
5594                 "%case4     = OpLabel\n"
5595                 "             OpStore %outloc %sc_4_quant\n"
5596                 "             OpBranch %exit\n"
5597
5598                 "%case5     = OpLabel\n"
5599                 "             OpStore %outloc %sc_5_quant\n"
5600                 "             OpBranch %exit\n"
5601
5602                 "%exit      = OpLabel\n"
5603                 "             OpReturn\n"
5604
5605                 "             OpFunctionEnd\n");
5606
5607         {
5608                 ComputeShaderSpec       spec;
5609                 const deUint8           numCases        = 4;
5610                 vector<float>           inputs          (numCases, 0.f);
5611                 vector<float>           outputs;
5612
5613                 spec.assembly           = shader;
5614                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5615
5616                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5617                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5618                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5619                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5620
5621                 outputs.push_back(std::numeric_limits<float>::infinity());
5622                 outputs.push_back(-std::numeric_limits<float>::infinity());
5623                 outputs.push_back(std::numeric_limits<float>::infinity());
5624                 outputs.push_back(-std::numeric_limits<float>::infinity());
5625
5626                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5627                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5628
5629                 group->addChild(new SpvAsmComputeShaderCase(
5630                         testCtx, "infinities", "Check that infinities propagated and created", spec));
5631         }
5632
5633         {
5634                 ComputeShaderSpec       spec;
5635                 const deUint8           numCases        = 2;
5636                 vector<float>           inputs          (numCases, 0.f);
5637                 vector<float>           outputs;
5638
5639                 spec.assembly           = shader;
5640                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5641                 spec.verifyIO           = &compareNan;
5642
5643                 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
5644                 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
5645
5646                 for (deUint8 idx = 0; idx < numCases; ++idx)
5647                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
5648
5649                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5650                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5651
5652                 group->addChild(new SpvAsmComputeShaderCase(
5653                         testCtx, "propagated_nans", "Check that nans are propagated", spec));
5654         }
5655
5656         {
5657                 ComputeShaderSpec       spec;
5658                 const deUint8           numCases        = 6;
5659                 vector<float>           inputs          (numCases, 0.f);
5660                 vector<float>           outputs;
5661
5662                 spec.assembly           = shader;
5663                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5664
5665                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
5666                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
5667                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
5668                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
5669                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
5670                 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
5671
5672                 outputs.push_back(0.f);
5673                 outputs.push_back(-0.f);
5674                 outputs.push_back(0.f);
5675                 outputs.push_back(-0.f);
5676                 outputs.push_back(0.f);
5677                 outputs.push_back(-0.f);
5678
5679                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5680                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5681
5682                 group->addChild(new SpvAsmComputeShaderCase(
5683                         testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5684         }
5685
5686         {
5687                 ComputeShaderSpec       spec;
5688                 const deUint8           numCases        = 6;
5689                 vector<float>           inputs          (numCases, 0.f);
5690                 vector<float>           outputs;
5691
5692                 spec.assembly           = shader;
5693                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5694
5695                 for (deUint8 idx = 0; idx < 6; ++idx)
5696                 {
5697                         const float f = static_cast<float>(idx * 10 - 30) / 4.f;
5698                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
5699                         outputs.push_back(f);
5700                 }
5701
5702                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5703                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5704
5705                 group->addChild(new SpvAsmComputeShaderCase(
5706                         testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5707         }
5708
5709         {
5710                 ComputeShaderSpec       spec;
5711                 const deUint8           numCases        = 4;
5712                 vector<float>           inputs          (numCases, 0.f);
5713                 vector<float>           outputs;
5714
5715                 spec.assembly           = shader;
5716                 spec.numWorkGroups      = IVec3(numCases, 1, 1);
5717                 spec.verifyIO           = &compareOpQuantizeF16ComputeExactCase;
5718
5719                 outputs.push_back(constructNormalizedFloat(8, 0x300300));
5720                 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
5721                 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
5722                 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
5723
5724                 for (deUint8 idx = 0; idx < numCases; ++idx)
5725                         spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
5726
5727                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5728                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5729
5730                 group->addChild(new SpvAsmComputeShaderCase(
5731                         testCtx, "rounded", "Check that are rounded when needed", spec));
5732         }
5733
5734         return group.release();
5735 }
5736
5737 // Checks that constant null/composite values can be used in computation.
5738 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
5739 {
5740         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
5741         ComputeShaderSpec                               spec;
5742         de::Random                                              rnd                             (deStringHash(group->getName()));
5743         const int                                               numElements             = 100;
5744         vector<float>                                   positiveFloats  (numElements, 0);
5745         vector<float>                                   negativeFloats  (numElements, 0);
5746
5747         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5748
5749         for (size_t ndx = 0; ndx < numElements; ++ndx)
5750                 negativeFloats[ndx] = -positiveFloats[ndx];
5751
5752         spec.assembly =
5753                 "OpCapability Shader\n"
5754                 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
5755                 "OpMemoryModel Logical GLSL450\n"
5756                 "OpEntryPoint GLCompute %main \"main\" %id\n"
5757                 "OpExecutionMode %main LocalSize 1 1 1\n"
5758
5759                 "OpSource GLSL 430\n"
5760                 "OpName %main           \"main\"\n"
5761                 "OpName %id             \"gl_GlobalInvocationID\"\n"
5762
5763                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5764
5765                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5766
5767                 "%fmat      = OpTypeMatrix %fvec3 3\n"
5768                 "%ten       = OpConstant %u32 10\n"
5769                 "%f32arr10  = OpTypeArray %f32 %ten\n"
5770                 "%fst       = OpTypeStruct %f32 %f32\n"
5771
5772                 + string(getComputeAsmInputOutputBuffer()) +
5773
5774                 "%id        = OpVariable %uvec3ptr Input\n"
5775                 "%zero      = OpConstant %i32 0\n"
5776
5777                 // Create a bunch of null values
5778                 "%unull     = OpConstantNull %u32\n"
5779                 "%fnull     = OpConstantNull %f32\n"
5780                 "%vnull     = OpConstantNull %fvec3\n"
5781                 "%mnull     = OpConstantNull %fmat\n"
5782                 "%anull     = OpConstantNull %f32arr10\n"
5783                 "%snull     = OpConstantComposite %fst %fnull %fnull\n"
5784
5785                 "%main      = OpFunction %void None %voidf\n"
5786                 "%label     = OpLabel\n"
5787                 "%idval     = OpLoad %uvec3 %id\n"
5788                 "%x         = OpCompositeExtract %u32 %idval 0\n"
5789                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5790                 "%inval     = OpLoad %f32 %inloc\n"
5791                 "%neg       = OpFNegate %f32 %inval\n"
5792
5793                 // Get the abs() of (a certain element of) those null values
5794                 "%unull_cov = OpConvertUToF %f32 %unull\n"
5795                 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
5796                 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
5797                 "%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
5798                 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
5799                 "%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
5800                 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
5801                 "%anull_3   = OpCompositeExtract %f32 %anull 3\n"
5802                 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
5803                 "%snull_1   = OpCompositeExtract %f32 %snull 1\n"
5804                 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
5805
5806                 // Add them all
5807                 "%add1      = OpFAdd %f32 %neg  %unull_abs\n"
5808                 "%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
5809                 "%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
5810                 "%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
5811                 "%add5      = OpFAdd %f32 %add4 %anull_abs\n"
5812                 "%final     = OpFAdd %f32 %add5 %snull_abs\n"
5813
5814                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5815                 "             OpStore %outloc %final\n" // write to output
5816                 "             OpReturn\n"
5817                 "             OpFunctionEnd\n";
5818         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5819         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5820         spec.numWorkGroups = IVec3(numElements, 1, 1);
5821
5822         group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
5823
5824         return group.release();
5825 }
5826
5827 // Assembly code used for testing loop control is based on GLSL source code:
5828 // #version 430
5829 //
5830 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5831 //   float elements[];
5832 // } input_data;
5833 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5834 //   float elements[];
5835 // } output_data;
5836 //
5837 // void main() {
5838 //   uint x = gl_GlobalInvocationID.x;
5839 //   output_data.elements[x] = input_data.elements[x];
5840 //   for (uint i = 0; i < 4; ++i)
5841 //     output_data.elements[x] += 1.f;
5842 // }
5843 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
5844 {
5845         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
5846         vector<CaseParameter>                   cases;
5847         de::Random                                              rnd                             (deStringHash(group->getName()));
5848         const int                                               numElements             = 100;
5849         vector<float>                                   inputFloats             (numElements, 0);
5850         vector<float>                                   outputFloats    (numElements, 0);
5851         const StringTemplate                    shaderTemplate  (
5852                 string(getComputeAsmShaderPreamble()) +
5853
5854                 "OpSource GLSL 430\n"
5855                 "OpName %main \"main\"\n"
5856                 "OpName %id \"gl_GlobalInvocationID\"\n"
5857
5858                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5859
5860                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5861
5862                 "%u32ptr      = OpTypePointer Function %u32\n"
5863
5864                 "%id          = OpVariable %uvec3ptr Input\n"
5865                 "%zero        = OpConstant %i32 0\n"
5866                 "%uzero       = OpConstant %u32 0\n"
5867                 "%one         = OpConstant %i32 1\n"
5868                 "%constf1     = OpConstant %f32 1.0\n"
5869                 "%four        = OpConstant %u32 4\n"
5870
5871                 "%main        = OpFunction %void None %voidf\n"
5872                 "%entry       = OpLabel\n"
5873                 "%i           = OpVariable %u32ptr Function\n"
5874                 "               OpStore %i %uzero\n"
5875
5876                 "%idval       = OpLoad %uvec3 %id\n"
5877                 "%x           = OpCompositeExtract %u32 %idval 0\n"
5878                 "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
5879                 "%inval       = OpLoad %f32 %inloc\n"
5880                 "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
5881                 "               OpStore %outloc %inval\n"
5882                 "               OpBranch %loop_entry\n"
5883
5884                 "%loop_entry  = OpLabel\n"
5885                 "%i_val       = OpLoad %u32 %i\n"
5886                 "%cmp_lt      = OpULessThan %bool %i_val %four\n"
5887                 "               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
5888                 "               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
5889                 "%loop_body   = OpLabel\n"
5890                 "%outval      = OpLoad %f32 %outloc\n"
5891                 "%addf1       = OpFAdd %f32 %outval %constf1\n"
5892                 "               OpStore %outloc %addf1\n"
5893                 "%new_i       = OpIAdd %u32 %i_val %one\n"
5894                 "               OpStore %i %new_i\n"
5895                 "               OpBranch %loop_entry\n"
5896                 "%loop_merge  = OpLabel\n"
5897                 "               OpReturn\n"
5898                 "               OpFunctionEnd\n");
5899
5900         cases.push_back(CaseParameter("none",                           "None"));
5901         cases.push_back(CaseParameter("unroll",                         "Unroll"));
5902         cases.push_back(CaseParameter("dont_unroll",            "DontUnroll"));
5903
5904         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5905
5906         for (size_t ndx = 0; ndx < numElements; ++ndx)
5907                 outputFloats[ndx] = inputFloats[ndx] + 4.f;
5908
5909         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5910         {
5911                 map<string, string>             specializations;
5912                 ComputeShaderSpec               spec;
5913
5914                 specializations["CONTROL"] = cases[caseNdx].param;
5915                 spec.assembly = shaderTemplate.specialize(specializations);
5916                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5917                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5918                 spec.numWorkGroups = IVec3(numElements, 1, 1);
5919
5920                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5921         }
5922
5923         group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
5924         group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
5925
5926         return group.release();
5927 }
5928
5929 // Assembly code used for testing selection control is based on GLSL source code:
5930 // #version 430
5931 //
5932 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5933 //   float elements[];
5934 // } input_data;
5935 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5936 //   float elements[];
5937 // } output_data;
5938 //
5939 // void main() {
5940 //   uint x = gl_GlobalInvocationID.x;
5941 //   float val = input_data.elements[x];
5942 //   if (val > 10.f)
5943 //     output_data.elements[x] = val + 1.f;
5944 //   else
5945 //     output_data.elements[x] = val - 1.f;
5946 // }
5947 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
5948 {
5949         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
5950         vector<CaseParameter>                   cases;
5951         de::Random                                              rnd                             (deStringHash(group->getName()));
5952         const int                                               numElements             = 100;
5953         vector<float>                                   inputFloats             (numElements, 0);
5954         vector<float>                                   outputFloats    (numElements, 0);
5955         const StringTemplate                    shaderTemplate  (
5956                 string(getComputeAsmShaderPreamble()) +
5957
5958                 "OpSource GLSL 430\n"
5959                 "OpName %main \"main\"\n"
5960                 "OpName %id \"gl_GlobalInvocationID\"\n"
5961
5962                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5963
5964                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5965
5966                 "%id       = OpVariable %uvec3ptr Input\n"
5967                 "%zero     = OpConstant %i32 0\n"
5968                 "%constf1  = OpConstant %f32 1.0\n"
5969                 "%constf10 = OpConstant %f32 10.0\n"
5970
5971                 "%main     = OpFunction %void None %voidf\n"
5972                 "%entry    = OpLabel\n"
5973                 "%idval    = OpLoad %uvec3 %id\n"
5974                 "%x        = OpCompositeExtract %u32 %idval 0\n"
5975                 "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
5976                 "%inval    = OpLoad %f32 %inloc\n"
5977                 "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
5978                 "%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
5979
5980                 "            OpSelectionMerge %if_end ${CONTROL}\n"
5981                 "            OpBranchConditional %cmp_gt %if_true %if_false\n"
5982                 "%if_true  = OpLabel\n"
5983                 "%addf1    = OpFAdd %f32 %inval %constf1\n"
5984                 "            OpStore %outloc %addf1\n"
5985                 "            OpBranch %if_end\n"
5986                 "%if_false = OpLabel\n"
5987                 "%subf1    = OpFSub %f32 %inval %constf1\n"
5988                 "            OpStore %outloc %subf1\n"
5989                 "            OpBranch %if_end\n"
5990                 "%if_end   = OpLabel\n"
5991                 "            OpReturn\n"
5992                 "            OpFunctionEnd\n");
5993
5994         cases.push_back(CaseParameter("none",                                   "None"));
5995         cases.push_back(CaseParameter("flatten",                                "Flatten"));
5996         cases.push_back(CaseParameter("dont_flatten",                   "DontFlatten"));
5997         cases.push_back(CaseParameter("flatten_dont_flatten",   "DontFlatten|Flatten"));
5998
5999         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6000
6001         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6002         floorAll(inputFloats);
6003
6004         for (size_t ndx = 0; ndx < numElements; ++ndx)
6005                 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6006
6007         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6008         {
6009                 map<string, string>             specializations;
6010                 ComputeShaderSpec               spec;
6011
6012                 specializations["CONTROL"] = cases[caseNdx].param;
6013                 spec.assembly = shaderTemplate.specialize(specializations);
6014                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6015                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6016                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6017
6018                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6019         }
6020
6021         return group.release();
6022 }
6023
6024 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6025 {
6026         // Generate a long name.
6027         std::string longname;
6028         longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6029
6030         // Some bad names, abusing utf-8 encoding. This may also cause problems
6031         // with the logs.
6032         // 1. Various illegal code points in utf-8
6033         std::string utf8illegal =
6034                 "Illegal bytes in UTF-8: "
6035                 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6036                 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6037
6038         // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6039         std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6040
6041         // 3. Some overlong encodings
6042         std::string utf8overlong =
6043                 "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6044                 "\xf0\x8f\xbf\xbf";
6045
6046         // 4. Internet "zalgo" meme "bleeding text"
6047         std::string utf8zalgo =
6048                 "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6049                 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6050                 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6051                 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6052                 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6053                 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6054                 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6055                 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6056                 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6057                 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6058                 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6059                 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6060                 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6061                 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6062                 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6063                 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6064                 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6065                 "\x93\xcd\x96\xcc\x97\xff";
6066
6067         // General name abuses
6068         abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6069         abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6070         abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6071         abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6072         abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6073
6074         // GL keywords
6075         abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6076         abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6077         abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6078         abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6079         abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6080         abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6081         abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6082         abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6083         abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6084         abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6085         abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6086         abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6087         abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6088         abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6089         abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6090         abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6091         abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6092         abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6093         abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6094         abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6095         abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6096 }
6097
6098 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6099 {
6100         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6101         de::MovePtr<tcu::TestCaseGroup> entryMainGroup  (new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6102         de::MovePtr<tcu::TestCaseGroup> entryNotGroup   (new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6103         de::MovePtr<tcu::TestCaseGroup> abuseGroup              (new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6104         vector<CaseParameter>                   cases;
6105         vector<CaseParameter>                   abuseCases;
6106         vector<string>                                  testFunc;
6107         de::Random                                              rnd                             (deStringHash(group->getName()));
6108         const int                                               numElements             = 128;
6109         vector<float>                                   inputFloats             (numElements, 0);
6110         vector<float>                                   outputFloats    (numElements, 0);
6111
6112         getOpNameAbuseCases(abuseCases);
6113
6114         fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6115
6116         for(size_t ndx = 0; ndx < numElements; ++ndx)
6117                 outputFloats[ndx] = -inputFloats[ndx];
6118
6119         const string commonShaderHeader =
6120                 "OpCapability Shader\n"
6121                 "OpMemoryModel Logical GLSL450\n"
6122                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6123                 "OpExecutionMode %main LocalSize 1 1 1\n";
6124
6125         const string commonShaderFooter =
6126                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6127
6128                 + string(getComputeAsmInputOutputBufferTraits())
6129                 + string(getComputeAsmCommonTypes())
6130                 + string(getComputeAsmInputOutputBuffer()) +
6131
6132                 "%id        = OpVariable %uvec3ptr Input\n"
6133                 "%zero      = OpConstant %i32 0\n"
6134
6135                 "%func      = OpFunction %void None %voidf\n"
6136                 "%5         = OpLabel\n"
6137                 "             OpReturn\n"
6138                 "             OpFunctionEnd\n"
6139
6140                 "%main      = OpFunction %void None %voidf\n"
6141                 "%entry     = OpLabel\n"
6142                 "%7         = OpFunctionCall %void %func\n"
6143
6144                 "%idval     = OpLoad %uvec3 %id\n"
6145                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6146
6147                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6148                 "%inval     = OpLoad %f32 %inloc\n"
6149                 "%neg       = OpFNegate %f32 %inval\n"
6150                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6151                 "             OpStore %outloc %neg\n"
6152
6153                 "             OpReturn\n"
6154                 "             OpFunctionEnd\n";
6155
6156         const StringTemplate shaderTemplate (
6157                 "OpCapability Shader\n"
6158                 "OpMemoryModel Logical GLSL450\n"
6159                 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6160                 "OpExecutionMode %main LocalSize 1 1 1\n"
6161                 "OpName %${ID} \"${NAME}\"\n" +
6162                 commonShaderFooter);
6163
6164         const std::string multipleNames =
6165                 commonShaderHeader +
6166                 "OpName %main \"to_be\"\n"
6167                 "OpName %id   \"or_not\"\n"
6168                 "OpName %main \"to_be\"\n"
6169                 "OpName %main \"makes_no\"\n"
6170                 "OpName %func \"difference\"\n"
6171                 "OpName %5    \"to_me\"\n" +
6172                 commonShaderFooter;
6173
6174         {
6175                 ComputeShaderSpec       spec;
6176
6177                 spec.assembly           = multipleNames;
6178                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6179                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6180                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6181
6182                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6183         }
6184
6185         const std::string everythingNamed =
6186                 commonShaderHeader +
6187                 "OpName %main   \"name1\"\n"
6188                 "OpName %id     \"name2\"\n"
6189                 "OpName %zero   \"name3\"\n"
6190                 "OpName %entry  \"name4\"\n"
6191                 "OpName %func   \"name5\"\n"
6192                 "OpName %5      \"name6\"\n"
6193                 "OpName %7      \"name7\"\n"
6194                 "OpName %idval  \"name8\"\n"
6195                 "OpName %inloc  \"name9\"\n"
6196                 "OpName %inval  \"name10\"\n"
6197                 "OpName %neg    \"name11\"\n"
6198                 "OpName %outloc \"name12\"\n"+
6199                 commonShaderFooter;
6200         {
6201                 ComputeShaderSpec       spec;
6202
6203                 spec.assembly           = everythingNamed;
6204                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6205                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6206                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6207
6208                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6209         }
6210
6211         const std::string everythingNamedTheSame =
6212                 commonShaderHeader +
6213                 "OpName %main   \"the_same\"\n"
6214                 "OpName %id     \"the_same\"\n"
6215                 "OpName %zero   \"the_same\"\n"
6216                 "OpName %entry  \"the_same\"\n"
6217                 "OpName %func   \"the_same\"\n"
6218                 "OpName %5      \"the_same\"\n"
6219                 "OpName %7      \"the_same\"\n"
6220                 "OpName %idval  \"the_same\"\n"
6221                 "OpName %inloc  \"the_same\"\n"
6222                 "OpName %inval  \"the_same\"\n"
6223                 "OpName %neg    \"the_same\"\n"
6224                 "OpName %outloc \"the_same\"\n"+
6225                 commonShaderFooter;
6226         {
6227                 ComputeShaderSpec       spec;
6228
6229                 spec.assembly           = everythingNamedTheSame;
6230                 spec.numWorkGroups      = IVec3(numElements, 1, 1);
6231                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6232                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6233
6234                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6235         }
6236
6237         // main_is_...
6238         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6239         {
6240                 map<string, string>     specializations;
6241                 ComputeShaderSpec       spec;
6242
6243                 specializations["ENTRY"]        = "main";
6244                 specializations["ID"]           = "main";
6245                 specializations["NAME"]         = abuseCases[ndx].param;
6246                 spec.assembly                           = shaderTemplate.specialize(specializations);
6247                 spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6248                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6249                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6250
6251                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6252         }
6253
6254         // x_is_....
6255         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6256         {
6257                 map<string, string>     specializations;
6258                 ComputeShaderSpec       spec;
6259
6260                 specializations["ENTRY"]        = "main";
6261                 specializations["ID"]           = "x";
6262                 specializations["NAME"]         = abuseCases[ndx].param;
6263                 spec.assembly                           = shaderTemplate.specialize(specializations);
6264                 spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6265                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6266                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6267
6268                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6269         }
6270
6271         cases.push_back(CaseParameter("_is_main", "main"));
6272         cases.push_back(CaseParameter("_is_not_main", "not_main"));
6273         testFunc.push_back("main");
6274         testFunc.push_back("func");
6275
6276         for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6277         {
6278                 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6279                 {
6280                         map<string, string>     specializations;
6281                         ComputeShaderSpec       spec;
6282
6283                         specializations["ENTRY"]        = "main";
6284                         specializations["ID"]           = testFunc[fNdx];
6285                         specializations["NAME"]         = cases[ndx].param;
6286                         spec.assembly                           = shaderTemplate.specialize(specializations);
6287                         spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6288                         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6289                         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6290
6291                         entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6292                 }
6293         }
6294
6295         cases.push_back(CaseParameter("_is_entry", "rdc"));
6296
6297         for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6298         {
6299                 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6300                 {
6301                         map<string, string>     specializations;
6302                         ComputeShaderSpec       spec;
6303
6304                         specializations["ENTRY"]        = "rdc";
6305                         specializations["ID"]           = testFunc[fNdx];
6306                         specializations["NAME"]         = cases[ndx].param;
6307                         spec.assembly                           = shaderTemplate.specialize(specializations);
6308                         spec.numWorkGroups                      = IVec3(numElements, 1, 1);
6309                         spec.entryPoint                         = "rdc";
6310                         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6311                         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6312
6313                         entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6314                 }
6315         }
6316
6317         group->addChild(entryMainGroup.release());
6318         group->addChild(entryNotGroup.release());
6319         group->addChild(abuseGroup.release());
6320
6321         return group.release();
6322 }
6323
6324 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6325 {
6326         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6327         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6328         vector<CaseParameter>                   abuseCases;
6329         vector<string>                                  testFunc;
6330         de::Random                                              rnd(deStringHash(group->getName()));
6331         const int                                               numElements = 128;
6332         vector<float>                                   inputFloats(numElements, 0);
6333         vector<float>                                   outputFloats(numElements, 0);
6334
6335         getOpNameAbuseCases(abuseCases);
6336
6337         fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6338
6339         for (size_t ndx = 0; ndx < numElements; ++ndx)
6340                 outputFloats[ndx] = -inputFloats[ndx];
6341
6342         const string commonShaderHeader =
6343                 "OpCapability Shader\n"
6344                 "OpMemoryModel Logical GLSL450\n"
6345                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6346                 "OpExecutionMode %main LocalSize 1 1 1\n";
6347
6348         const string commonShaderFooter =
6349                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6350
6351                 + string(getComputeAsmInputOutputBufferTraits())
6352                 + string(getComputeAsmCommonTypes())
6353                 + string(getComputeAsmInputOutputBuffer()) +
6354
6355                 "%u3str     = OpTypeStruct %u32 %u32 %u32\n"
6356
6357                 "%id        = OpVariable %uvec3ptr Input\n"
6358                 "%zero      = OpConstant %i32 0\n"
6359
6360                 "%main      = OpFunction %void None %voidf\n"
6361                 "%entry     = OpLabel\n"
6362
6363                 "%idval     = OpLoad %uvec3 %id\n"
6364                 "%x0        = OpCompositeExtract %u32 %idval 0\n"
6365
6366                 "%idstr     = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6367                 "%x         = OpCompositeExtract %u32 %idstr 0\n"
6368
6369                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6370                 "%inval     = OpLoad %f32 %inloc\n"
6371                 "%neg       = OpFNegate %f32 %inval\n"
6372                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6373                 "             OpStore %outloc %neg\n"
6374
6375                 "             OpReturn\n"
6376                 "             OpFunctionEnd\n";
6377
6378         const StringTemplate shaderTemplate(
6379                 commonShaderHeader +
6380                 "OpMemberName %u3str 0 \"${NAME}\"\n" +
6381                 commonShaderFooter);
6382
6383         const std::string multipleNames =
6384                 commonShaderHeader +
6385                 "OpMemberName %u3str 0 \"to_be\"\n"
6386                 "OpMemberName %u3str 1 \"or_not\"\n"
6387                 "OpMemberName %u3str 0 \"to_be\"\n"
6388                 "OpMemberName %u3str 2 \"makes_no\"\n"
6389                 "OpMemberName %u3str 0 \"difference\"\n"
6390                 "OpMemberName %u3str 0 \"to_me\"\n" +
6391                 commonShaderFooter;
6392         {
6393                 ComputeShaderSpec       spec;
6394
6395                 spec.assembly = multipleNames;
6396                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6397                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6398                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6399
6400                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6401         }
6402
6403         const std::string everythingNamedTheSame =
6404                 commonShaderHeader +
6405                 "OpMemberName %u3str 0 \"the_same\"\n"
6406                 "OpMemberName %u3str 1 \"the_same\"\n"
6407                 "OpMemberName %u3str 2 \"the_same\"\n" +
6408                 commonShaderFooter;
6409
6410         {
6411                 ComputeShaderSpec       spec;
6412
6413                 spec.assembly = everythingNamedTheSame;
6414                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6415                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6416                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6417
6418                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6419         }
6420
6421         // u3str_x_is_....
6422         for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6423         {
6424                 map<string, string>     specializations;
6425                 ComputeShaderSpec       spec;
6426
6427                 specializations["NAME"] = abuseCases[ndx].param;
6428                 spec.assembly = shaderTemplate.specialize(specializations);
6429                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6430                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6431                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6432
6433                 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6434         }
6435
6436         group->addChild(abuseGroup.release());
6437
6438         return group.release();
6439 }
6440
6441 // Assembly code used for testing function control is based on GLSL source code:
6442 //
6443 // #version 430
6444 //
6445 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6446 //   float elements[];
6447 // } input_data;
6448 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6449 //   float elements[];
6450 // } output_data;
6451 //
6452 // float const10() { return 10.f; }
6453 //
6454 // void main() {
6455 //   uint x = gl_GlobalInvocationID.x;
6456 //   output_data.elements[x] = input_data.elements[x] + const10();
6457 // }
6458 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6459 {
6460         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6461         vector<CaseParameter>                   cases;
6462         de::Random                                              rnd                             (deStringHash(group->getName()));
6463         const int                                               numElements             = 100;
6464         vector<float>                                   inputFloats             (numElements, 0);
6465         vector<float>                                   outputFloats    (numElements, 0);
6466         const StringTemplate                    shaderTemplate  (
6467                 string(getComputeAsmShaderPreamble()) +
6468
6469                 "OpSource GLSL 430\n"
6470                 "OpName %main \"main\"\n"
6471                 "OpName %func_const10 \"const10(\"\n"
6472                 "OpName %id \"gl_GlobalInvocationID\"\n"
6473
6474                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6475
6476                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6477
6478                 "%f32f = OpTypeFunction %f32\n"
6479                 "%id = OpVariable %uvec3ptr Input\n"
6480                 "%zero = OpConstant %i32 0\n"
6481                 "%constf10 = OpConstant %f32 10.0\n"
6482
6483                 "%main         = OpFunction %void None %voidf\n"
6484                 "%entry        = OpLabel\n"
6485                 "%idval        = OpLoad %uvec3 %id\n"
6486                 "%x            = OpCompositeExtract %u32 %idval 0\n"
6487                 "%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
6488                 "%inval        = OpLoad %f32 %inloc\n"
6489                 "%ret_10       = OpFunctionCall %f32 %func_const10\n"
6490                 "%fadd         = OpFAdd %f32 %inval %ret_10\n"
6491                 "%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
6492                 "                OpStore %outloc %fadd\n"
6493                 "                OpReturn\n"
6494                 "                OpFunctionEnd\n"
6495
6496                 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6497                 "%label        = OpLabel\n"
6498                 "                OpReturnValue %constf10\n"
6499                 "                OpFunctionEnd\n");
6500
6501         cases.push_back(CaseParameter("none",                                           "None"));
6502         cases.push_back(CaseParameter("inline",                                         "Inline"));
6503         cases.push_back(CaseParameter("dont_inline",                            "DontInline"));
6504         cases.push_back(CaseParameter("pure",                                           "Pure"));
6505         cases.push_back(CaseParameter("const",                                          "Const"));
6506         cases.push_back(CaseParameter("inline_pure",                            "Inline|Pure"));
6507         cases.push_back(CaseParameter("const_dont_inline",                      "Const|DontInline"));
6508         cases.push_back(CaseParameter("inline_dont_inline",                     "Inline|DontInline"));
6509         cases.push_back(CaseParameter("pure_inline_dont_inline",        "Pure|Inline|DontInline"));
6510
6511         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6512
6513         // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6514         floorAll(inputFloats);
6515
6516         for (size_t ndx = 0; ndx < numElements; ++ndx)
6517                 outputFloats[ndx] = inputFloats[ndx] + 10.f;
6518
6519         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6520         {
6521                 map<string, string>             specializations;
6522                 ComputeShaderSpec               spec;
6523
6524                 specializations["CONTROL"] = cases[caseNdx].param;
6525                 spec.assembly = shaderTemplate.specialize(specializations);
6526                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6527                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6528                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6529
6530                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6531         }
6532
6533         return group.release();
6534 }
6535
6536 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6537 {
6538         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6539         vector<CaseParameter>                   cases;
6540         de::Random                                              rnd                             (deStringHash(group->getName()));
6541         const int                                               numElements             = 100;
6542         vector<float>                                   inputFloats             (numElements, 0);
6543         vector<float>                                   outputFloats    (numElements, 0);
6544         const StringTemplate                    shaderTemplate  (
6545                 string(getComputeAsmShaderPreamble()) +
6546
6547                 "OpSource GLSL 430\n"
6548                 "OpName %main           \"main\"\n"
6549                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6550
6551                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6552
6553                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6554
6555                 "%f32ptr_f  = OpTypePointer Function %f32\n"
6556
6557                 "%id        = OpVariable %uvec3ptr Input\n"
6558                 "%zero      = OpConstant %i32 0\n"
6559                 "%four      = OpConstant %i32 4\n"
6560
6561                 "%main      = OpFunction %void None %voidf\n"
6562                 "%label     = OpLabel\n"
6563                 "%copy      = OpVariable %f32ptr_f Function\n"
6564                 "%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
6565                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6566                 "%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
6567                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6568                 "             OpCopyMemory %copy %inloc ${ACCESS}\n"
6569                 "%val1      = OpLoad %f32 %copy\n"
6570                 "%val2      = OpLoad %f32 %inloc\n"
6571                 "%add       = OpFAdd %f32 %val1 %val2\n"
6572                 "             OpStore %outloc %add ${ACCESS}\n"
6573                 "             OpReturn\n"
6574                 "             OpFunctionEnd\n");
6575
6576         cases.push_back(CaseParameter("null",                                   ""));
6577         cases.push_back(CaseParameter("none",                                   "None"));
6578         cases.push_back(CaseParameter("volatile",                               "Volatile"));
6579         cases.push_back(CaseParameter("aligned",                                "Aligned 4"));
6580         cases.push_back(CaseParameter("nontemporal",                    "Nontemporal"));
6581         cases.push_back(CaseParameter("aligned_nontemporal",    "Aligned|Nontemporal 4"));
6582         cases.push_back(CaseParameter("aligned_volatile",               "Volatile|Aligned 4"));
6583
6584         fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6585
6586         for (size_t ndx = 0; ndx < numElements; ++ndx)
6587                 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6588
6589         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6590         {
6591                 map<string, string>             specializations;
6592                 ComputeShaderSpec               spec;
6593
6594                 specializations["ACCESS"] = cases[caseNdx].param;
6595                 spec.assembly = shaderTemplate.specialize(specializations);
6596                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6597                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6598                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6599
6600                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6601         }
6602
6603         return group.release();
6604 }
6605
6606 // Checks that we can get undefined values for various types, without exercising a computation with it.
6607 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6608 {
6609         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6610         vector<CaseParameter>                   cases;
6611         de::Random                                              rnd                             (deStringHash(group->getName()));
6612         const int                                               numElements             = 100;
6613         vector<float>                                   positiveFloats  (numElements, 0);
6614         vector<float>                                   negativeFloats  (numElements, 0);
6615         const StringTemplate                    shaderTemplate  (
6616                 string(getComputeAsmShaderPreamble()) +
6617
6618                 "OpSource GLSL 430\n"
6619                 "OpName %main           \"main\"\n"
6620                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6621
6622                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6623
6624                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6625                 "%uvec2     = OpTypeVector %u32 2\n"
6626                 "%fvec4     = OpTypeVector %f32 4\n"
6627                 "%fmat33    = OpTypeMatrix %fvec3 3\n"
6628                 "%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
6629                 "%sampler   = OpTypeSampler\n"
6630                 "%simage    = OpTypeSampledImage %image\n"
6631                 "%const100  = OpConstant %u32 100\n"
6632                 "%uarr100   = OpTypeArray %i32 %const100\n"
6633                 "%struct    = OpTypeStruct %f32 %i32 %u32\n"
6634                 "%pointer   = OpTypePointer Function %i32\n"
6635                 + string(getComputeAsmInputOutputBuffer()) +
6636
6637                 "%id        = OpVariable %uvec3ptr Input\n"
6638                 "%zero      = OpConstant %i32 0\n"
6639
6640                 "%main      = OpFunction %void None %voidf\n"
6641                 "%label     = OpLabel\n"
6642
6643                 "%undef     = OpUndef ${TYPE}\n"
6644
6645                 "%idval     = OpLoad %uvec3 %id\n"
6646                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6647
6648                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6649                 "%inval     = OpLoad %f32 %inloc\n"
6650                 "%neg       = OpFNegate %f32 %inval\n"
6651                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6652                 "             OpStore %outloc %neg\n"
6653                 "             OpReturn\n"
6654                 "             OpFunctionEnd\n");
6655
6656         cases.push_back(CaseParameter("bool",                   "%bool"));
6657         cases.push_back(CaseParameter("sint32",                 "%i32"));
6658         cases.push_back(CaseParameter("uint32",                 "%u32"));
6659         cases.push_back(CaseParameter("float32",                "%f32"));
6660         cases.push_back(CaseParameter("vec4float32",    "%fvec4"));
6661         cases.push_back(CaseParameter("vec2uint32",             "%uvec2"));
6662         cases.push_back(CaseParameter("matrix",                 "%fmat33"));
6663         cases.push_back(CaseParameter("image",                  "%image"));
6664         cases.push_back(CaseParameter("sampler",                "%sampler"));
6665         cases.push_back(CaseParameter("sampledimage",   "%simage"));
6666         cases.push_back(CaseParameter("array",                  "%uarr100"));
6667         cases.push_back(CaseParameter("runtimearray",   "%f32arr"));
6668         cases.push_back(CaseParameter("struct",                 "%struct"));
6669         cases.push_back(CaseParameter("pointer",                "%pointer"));
6670
6671         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6672
6673         for (size_t ndx = 0; ndx < numElements; ++ndx)
6674                 negativeFloats[ndx] = -positiveFloats[ndx];
6675
6676         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6677         {
6678                 map<string, string>             specializations;
6679                 ComputeShaderSpec               spec;
6680
6681                 specializations["TYPE"] = cases[caseNdx].param;
6682                 spec.assembly = shaderTemplate.specialize(specializations);
6683                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6684                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6685                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6686
6687                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6688         }
6689
6690                 return group.release();
6691 }
6692
6693 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
6694 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
6695 {
6696         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
6697         vector<CaseParameter>                   cases;
6698         de::Random                                              rnd                             (deStringHash(group->getName()));
6699         const int                                               numElements             = 100;
6700         vector<float>                                   positiveFloats  (numElements, 0);
6701         vector<float>                                   negativeFloats  (numElements, 0);
6702         const StringTemplate                    shaderTemplate  (
6703                 "OpCapability Shader\n"
6704                 "OpCapability Float16\n"
6705                 "OpMemoryModel Logical GLSL450\n"
6706                 "OpEntryPoint GLCompute %main \"main\" %id\n"
6707                 "OpExecutionMode %main LocalSize 1 1 1\n"
6708                 "OpSource GLSL 430\n"
6709                 "OpName %main           \"main\"\n"
6710                 "OpName %id             \"gl_GlobalInvocationID\"\n"
6711
6712                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6713
6714                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6715
6716                 "%id        = OpVariable %uvec3ptr Input\n"
6717                 "%zero      = OpConstant %i32 0\n"
6718                 "%f16       = OpTypeFloat 16\n"
6719                 "%c_f16_0   = OpConstant %f16 0.0\n"
6720                 "%c_f16_0_5 = OpConstant %f16 0.5\n"
6721                 "%c_f16_1   = OpConstant %f16 1.0\n"
6722                 "%v2f16     = OpTypeVector %f16 2\n"
6723                 "%v3f16     = OpTypeVector %f16 3\n"
6724                 "%v4f16     = OpTypeVector %f16 4\n"
6725
6726                 "${CONSTANT}\n"
6727
6728                 "%main      = OpFunction %void None %voidf\n"
6729                 "%label     = OpLabel\n"
6730                 "%idval     = OpLoad %uvec3 %id\n"
6731                 "%x         = OpCompositeExtract %u32 %idval 0\n"
6732                 "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6733                 "%inval     = OpLoad %f32 %inloc\n"
6734                 "%neg       = OpFNegate %f32 %inval\n"
6735                 "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6736                 "             OpStore %outloc %neg\n"
6737                 "             OpReturn\n"
6738                 "             OpFunctionEnd\n");
6739
6740
6741         cases.push_back(CaseParameter("vector",                 "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
6742         cases.push_back(CaseParameter("matrix",                 "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
6743                                                                                                         "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
6744                                                                                                         "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
6745         cases.push_back(CaseParameter("struct",                 "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
6746                                                                                                         "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
6747                                                                                                         "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
6748                                                                                                         "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
6749                                                                                                         "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
6750         cases.push_back(CaseParameter("nested_struct",  "%st1 = OpTypeStruct %i32 %f16\n"
6751                                                                                                         "%st2 = OpTypeStruct %i32 %i32\n"
6752                                                                                                         "%struct = OpTypeStruct %st1 %st2\n"
6753                                                                                                         "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
6754                                                                                                         "%st2val = OpConstantComposite %st2 %zero %zero\n"
6755                                                                                                         "%const = OpConstantComposite %struct %st1val %st2val"));
6756
6757         fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6758
6759         for (size_t ndx = 0; ndx < numElements; ++ndx)
6760                 negativeFloats[ndx] = -positiveFloats[ndx];
6761
6762         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6763         {
6764                 map<string, string>             specializations;
6765                 ComputeShaderSpec               spec;
6766
6767                 specializations["CONSTANT"] = cases[caseNdx].param;
6768                 spec.assembly = shaderTemplate.specialize(specializations);
6769                 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6770                 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6771                 spec.numWorkGroups = IVec3(numElements, 1, 1);
6772
6773                 spec.extensions.push_back("VK_KHR_shader_float16_int8");
6774
6775                 spec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
6776
6777                 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6778         }
6779
6780         return group.release();
6781 }
6782
6783 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
6784 {
6785         const size_t            inDataLength    = inData.size();
6786         vector<deFloat16>       result;
6787
6788         result.reserve(inDataLength * inDataLength);
6789
6790         if (argNo == 0)
6791         {
6792                 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
6793                         result.insert(result.end(), inData.begin(), inData.end());
6794         }
6795
6796         if (argNo == 1)
6797         {
6798                 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
6799                 {
6800                         const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
6801
6802                         result.insert(result.end(), tmp.begin(), tmp.end());
6803                 }
6804         }
6805
6806         return result;
6807 }
6808
6809 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
6810 {
6811         vector<deFloat16>       vec;
6812         vector<deFloat16>       result;
6813
6814         // Create vectors. vec will contain each possible pair from inData
6815         {
6816                 const size_t    inDataLength    = inData.size();
6817
6818                 DE_ASSERT(inDataLength <= 64);
6819
6820                 vec.reserve(2 * inDataLength * inDataLength);
6821
6822                 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
6823                 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
6824                 {
6825                         vec.push_back(inData[numIdxX]);
6826                         vec.push_back(inData[numIdxY]);
6827                 }
6828         }
6829
6830         // Create vector pairs. result will contain each possible pair from vec
6831         {
6832                 const size_t    coordsPerVector = 2;
6833                 const size_t    vectorsCount    = vec.size() / coordsPerVector;
6834
6835                 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
6836
6837                 if (argNo == 0)
6838                 {
6839                         for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
6840                         for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
6841                         {
6842                                 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
6843                                         result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
6844                         }
6845                 }
6846
6847                 if (argNo == 1)
6848                 {
6849                         for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
6850                         for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
6851                         {
6852                                 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
6853                                         result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
6854                         }
6855                 }
6856         }
6857
6858         return result;
6859 }
6860
6861 struct fp16isNan                        { bool operator()(const tcu::Float16 in1, const tcu::Float16)           { return in1.isNaN(); } };
6862 struct fp16isInf                        { bool operator()(const tcu::Float16 in1, const tcu::Float16)           { return in1.isInf(); } };
6863 struct fp16isEqual                      { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() == in2.asFloat(); } };
6864 struct fp16isUnequal            { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() != in2.asFloat(); } };
6865 struct fp16isLess                       { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() <  in2.asFloat(); } };
6866 struct fp16isGreater            { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() >  in2.asFloat(); } };
6867 struct fp16isLessOrEqual        { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() <= in2.asFloat(); } };
6868 struct fp16isGreaterOrEqual     { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)       { return in1.asFloat() >= in2.asFloat(); } };
6869
6870 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
6871 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
6872 {
6873         if (inputs.size() != 2 || outputAllocs.size() != 1)
6874                 return false;
6875
6876         vector<deUint8> input1Bytes;
6877         vector<deUint8> input2Bytes;
6878
6879         inputs[0].getBytes(input1Bytes);
6880         inputs[1].getBytes(input2Bytes);
6881
6882         const deUint32                  denormModesCount                        = 2;
6883         const deFloat16                 float16one                                      = tcu::Float16(1.0f).bits();
6884         const deFloat16                 float16zero                                     = tcu::Float16(0.0f).bits();
6885         const tcu::Float16              zero                                            = tcu::Float16::zero(1);
6886         const deFloat16* const  outputAsFP16                            = static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
6887         const deFloat16* const  input1AsFP16                            = reinterpret_cast<deFloat16* const>(&input1Bytes.front());
6888         const deFloat16* const  input2AsFP16                            = reinterpret_cast<deFloat16* const>(&input2Bytes.front());
6889         deUint32                                successfulRuns                          = denormModesCount;
6890         std::string                             results[denormModesCount];
6891         TestedLogicalFunction   testedLogicalFunction;
6892
6893         for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
6894         {
6895                 const bool flushToZero = (denormMode == 1);
6896
6897                 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
6898                 {
6899                         const tcu::Float16      f1pre                   = tcu::Float16(input1AsFP16[idx]);
6900                         const tcu::Float16      f2pre                   = tcu::Float16(input2AsFP16[idx]);
6901                         const tcu::Float16      f1                              = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
6902                         const tcu::Float16      f2                              = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
6903                         deFloat16                       expectedOutput  = float16zero;
6904
6905                         if (onlyTestFunc)
6906                         {
6907                                 if (testedLogicalFunction(f1, f2))
6908                                         expectedOutput = float16one;
6909                         }
6910                         else
6911                         {
6912                                 const bool      f1nan   = f1.isNaN();
6913                                 const bool      f2nan   = f2.isNaN();
6914
6915                                 // Skip NaN floats if not supported by implementation
6916                                 if (!nanSupported && (f1nan || f2nan))
6917                                         continue;
6918
6919                                 if (unationModeAnd)
6920                                 {
6921                                         const bool      ordered         = !f1nan && !f2nan;
6922
6923                                         if (ordered && testedLogicalFunction(f1, f2))
6924                                                 expectedOutput = float16one;
6925                                 }
6926                                 else
6927                                 {
6928                                         const bool      unordered       = f1nan || f2nan;
6929
6930                                         if (unordered || testedLogicalFunction(f1, f2))
6931                                                 expectedOutput = float16one;
6932                                 }
6933                         }
6934
6935                         if (outputAsFP16[idx] != expectedOutput)
6936                         {
6937                                 std::ostringstream str;
6938
6939                                 str << "ERROR: Sub-case #" << idx
6940                                         << " flushToZero:" << flushToZero
6941                                         << std::hex
6942                                         << " failed, inputs: 0x" << f1.bits()
6943                                         << ";0x" << f2.bits()
6944                                         << " output: 0x" << outputAsFP16[idx]
6945                                         << " expected output: 0x" << expectedOutput;
6946
6947                                 results[denormMode] = str.str();
6948
6949                                 successfulRuns--;
6950
6951                                 break;
6952                         }
6953                 }
6954         }
6955
6956         if (successfulRuns == 0)
6957                 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
6958                         log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
6959
6960         return successfulRuns > 0;
6961 }
6962
6963 } // anonymous
6964
6965 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
6966 {
6967         struct NameCodePair { string name, code; };
6968         RGBA                                                    defaultColors[4];
6969         de::MovePtr<tcu::TestCaseGroup> opSourceTests                   (new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
6970         const std::string                               opsourceGLSLWithFile    = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
6971         map<string, string>                             fragments                               = passthruFragments();
6972         const NameCodePair                              tests[]                                 =
6973         {
6974                 {"unknown", "OpSource Unknown 321"},
6975                 {"essl", "OpSource ESSL 310"},
6976                 {"glsl", "OpSource GLSL 450"},
6977                 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
6978                 {"opencl_c", "OpSource OpenCL_C 120"},
6979                 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
6980                 {"file", opsourceGLSLWithFile},
6981                 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
6982                 // Longest possible source string: SPIR-V limits instructions to 65535
6983                 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
6984                 // contain 65530 UTF8 characters (one word each) plus one last word
6985                 // containing 3 ASCII characters and \0.
6986                 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
6987         };
6988
6989         getDefaultColors(defaultColors);
6990         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
6991         {
6992                 fragments["debug"] = tests[testNdx].code;
6993                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
6994         }
6995
6996         return opSourceTests.release();
6997 }
6998
6999 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7000 {
7001         struct NameCodePair { string name, code; };
7002         RGBA                                                            defaultColors[4];
7003         de::MovePtr<tcu::TestCaseGroup>         opSourceTests           (new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7004         map<string, string>                                     fragments                       = passthruFragments();
7005         const std::string                                       opsource                        = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7006         const NameCodePair                                      tests[]                         =
7007         {
7008                 {"empty", opsource + "OpSourceContinued \"\""},
7009                 {"short", opsource + "OpSourceContinued \"abcde\""},
7010                 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7011                 // Longest possible source string: SPIR-V limits instructions to 65535
7012                 // words, of which the first one is OpSourceContinued/length; the rest
7013                 // will contain 65533 UTF8 characters (one word each) plus one last word
7014                 // containing 3 ASCII characters and \0.
7015                 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7016         };
7017
7018         getDefaultColors(defaultColors);
7019         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7020         {
7021                 fragments["debug"] = tests[testNdx].code;
7022                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7023         }
7024
7025         return opSourceTests.release();
7026 }
7027 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7028 {
7029         RGBA                                                             defaultColors[4];
7030         de::MovePtr<tcu::TestCaseGroup>          opLineTests             (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7031         map<string, string>                                      fragments;
7032         getDefaultColors(defaultColors);
7033         fragments["debug"]                      =
7034                 "%name = OpString \"name\"\n";
7035
7036         fragments["pre_main"]   =
7037                 "OpNoLine\n"
7038                 "OpNoLine\n"
7039                 "OpLine %name 1 1\n"
7040                 "OpNoLine\n"
7041                 "OpLine %name 1 1\n"
7042                 "OpLine %name 1 1\n"
7043                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7044                 "OpNoLine\n"
7045                 "OpLine %name 1 1\n"
7046                 "OpNoLine\n"
7047                 "OpLine %name 1 1\n"
7048                 "OpLine %name 1 1\n"
7049                 "%second_param1 = OpFunctionParameter %v4f32\n"
7050                 "OpNoLine\n"
7051                 "OpNoLine\n"
7052                 "%label_secondfunction = OpLabel\n"
7053                 "OpNoLine\n"
7054                 "OpReturnValue %second_param1\n"
7055                 "OpFunctionEnd\n"
7056                 "OpNoLine\n"
7057                 "OpNoLine\n";
7058
7059         fragments["testfun"]            =
7060                 // A %test_code function that returns its argument unchanged.
7061                 "OpNoLine\n"
7062                 "OpNoLine\n"
7063                 "OpLine %name 1 1\n"
7064                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7065                 "OpNoLine\n"
7066                 "%param1 = OpFunctionParameter %v4f32\n"
7067                 "OpNoLine\n"
7068                 "OpNoLine\n"
7069                 "%label_testfun = OpLabel\n"
7070                 "OpNoLine\n"
7071                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7072                 "OpReturnValue %val1\n"
7073                 "OpFunctionEnd\n"
7074                 "OpLine %name 1 1\n"
7075                 "OpNoLine\n";
7076
7077         createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7078
7079         return opLineTests.release();
7080 }
7081
7082 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7083 {
7084         RGBA                                                            defaultColors[4];
7085         de::MovePtr<tcu::TestCaseGroup>         opModuleProcessedTests                  (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7086         map<string, string>                                     fragments;
7087         std::vector<std::string>                        noExtensions;
7088         GraphicsResources                                       resources;
7089
7090         getDefaultColors(defaultColors);
7091         resources.verifyBinary = veryfiBinaryShader;
7092         resources.spirvVersion = SPIRV_VERSION_1_3;
7093
7094         fragments["moduleprocessed"]                                                    =
7095                 "OpModuleProcessed \"VULKAN CTS\"\n"
7096                 "OpModuleProcessed \"Negative values\"\n"
7097                 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7098
7099         fragments["pre_main"]   =
7100                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7101                 "%second_param1 = OpFunctionParameter %v4f32\n"
7102                 "%label_secondfunction = OpLabel\n"
7103                 "OpReturnValue %second_param1\n"
7104                 "OpFunctionEnd\n";
7105
7106         fragments["testfun"]            =
7107                 // A %test_code function that returns its argument unchanged.
7108                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7109                 "%param1 = OpFunctionParameter %v4f32\n"
7110                 "%label_testfun = OpLabel\n"
7111                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7112                 "OpReturnValue %val1\n"
7113                 "OpFunctionEnd\n";
7114
7115         createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7116
7117         return opModuleProcessedTests.release();
7118 }
7119
7120
7121 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7122 {
7123         RGBA                                                                                                    defaultColors[4];
7124         de::MovePtr<tcu::TestCaseGroup>                                                 opLineTests                     (new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7125         map<string, string>                                                                             fragments;
7126         std::vector<std::pair<std::string, std::string> >               problemStrings;
7127
7128         problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7129         problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7130         problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7131         getDefaultColors(defaultColors);
7132
7133         fragments["debug"]                      =
7134                 "%other_name = OpString \"other_name\"\n";
7135
7136         fragments["pre_main"]   =
7137                 "OpLine %file_name 32 0\n"
7138                 "OpLine %file_name 32 32\n"
7139                 "OpLine %file_name 32 40\n"
7140                 "OpLine %other_name 32 40\n"
7141                 "OpLine %other_name 0 100\n"
7142                 "OpLine %other_name 0 4294967295\n"
7143                 "OpLine %other_name 4294967295 0\n"
7144                 "OpLine %other_name 32 40\n"
7145                 "OpLine %file_name 0 0\n"
7146                 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7147                 "OpLine %file_name 1 0\n"
7148                 "%second_param1 = OpFunctionParameter %v4f32\n"
7149                 "OpLine %file_name 1 3\n"
7150                 "OpLine %file_name 1 2\n"
7151                 "%label_secondfunction = OpLabel\n"
7152                 "OpLine %file_name 0 2\n"
7153                 "OpReturnValue %second_param1\n"
7154                 "OpFunctionEnd\n"
7155                 "OpLine %file_name 0 2\n"
7156                 "OpLine %file_name 0 2\n";
7157
7158         fragments["testfun"]            =
7159                 // A %test_code function that returns its argument unchanged.
7160                 "OpLine %file_name 1 0\n"
7161                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7162                 "OpLine %file_name 16 330\n"
7163                 "%param1 = OpFunctionParameter %v4f32\n"
7164                 "OpLine %file_name 14 442\n"
7165                 "%label_testfun = OpLabel\n"
7166                 "OpLine %file_name 11 1024\n"
7167                 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7168                 "OpLine %file_name 2 97\n"
7169                 "OpReturnValue %val1\n"
7170                 "OpFunctionEnd\n"
7171                 "OpLine %file_name 5 32\n";
7172
7173         for (size_t i = 0; i < problemStrings.size(); ++i)
7174         {
7175                 map<string, string> testFragments = fragments;
7176                 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7177                 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7178         }
7179
7180         return opLineTests.release();
7181 }
7182
7183 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7184 {
7185         de::MovePtr<tcu::TestCaseGroup> opConstantNullTests             (new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7186         RGBA                                                    colors[4];
7187
7188
7189         const char                                              functionStart[] =
7190                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7191                 "%param1 = OpFunctionParameter %v4f32\n"
7192                 "%lbl    = OpLabel\n";
7193
7194         const char                                              functionEnd[]   =
7195                 "OpReturnValue %transformed_param\n"
7196                 "OpFunctionEnd\n";
7197
7198         struct NameConstantsCode
7199         {
7200                 string name;
7201                 string constants;
7202                 string code;
7203         };
7204
7205         NameConstantsCode tests[] =
7206         {
7207                 {
7208                         "vec4",
7209                         "%cnull = OpConstantNull %v4f32\n",
7210                         "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7211                 },
7212                 {
7213                         "float",
7214                         "%cnull = OpConstantNull %f32\n",
7215                         "%vp = OpVariable %fp_v4f32 Function\n"
7216                         "%v  = OpLoad %v4f32 %vp\n"
7217                         "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7218                         "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7219                         "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7220                         "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7221                         "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7222                 },
7223                 {
7224                         "bool",
7225                         "%cnull             = OpConstantNull %bool\n",
7226                         "%v                 = OpVariable %fp_v4f32 Function\n"
7227                         "                     OpStore %v %param1\n"
7228                         "                     OpSelectionMerge %false_label None\n"
7229                         "                     OpBranchConditional %cnull %true_label %false_label\n"
7230                         "%true_label        = OpLabel\n"
7231                         "                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7232                         "                     OpBranch %false_label\n"
7233                         "%false_label       = OpLabel\n"
7234                         "%transformed_param = OpLoad %v4f32 %v\n"
7235                 },
7236                 {
7237                         "i32",
7238                         "%cnull             = OpConstantNull %i32\n",
7239                         "%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7240                         "%b                 = OpIEqual %bool %cnull %c_i32_0\n"
7241                         "                     OpSelectionMerge %false_label None\n"
7242                         "                     OpBranchConditional %b %true_label %false_label\n"
7243                         "%true_label        = OpLabel\n"
7244                         "                     OpStore %v %param1\n"
7245                         "                     OpBranch %false_label\n"
7246                         "%false_label       = OpLabel\n"
7247                         "%transformed_param = OpLoad %v4f32 %v\n"
7248                 },
7249                 {
7250                         "struct",
7251                         "%stype             = OpTypeStruct %f32 %v4f32\n"
7252                         "%fp_stype          = OpTypePointer Function %stype\n"
7253                         "%cnull             = OpConstantNull %stype\n",
7254                         "%v                 = OpVariable %fp_stype Function %cnull\n"
7255                         "%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7256                         "%f_val             = OpLoad %v4f32 %f\n"
7257                         "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7258                 },
7259                 {
7260                         "array",
7261                         "%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
7262                         "%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
7263                         "%cnull             = OpConstantNull %a4_v4f32\n",
7264                         "%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
7265                         "%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7266                         "%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7267                         "%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7268                         "%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7269                         "%f_val             = OpLoad %v4f32 %f\n"
7270                         "%f1_val            = OpLoad %v4f32 %f1\n"
7271                         "%f2_val            = OpLoad %v4f32 %f2\n"
7272                         "%f3_val            = OpLoad %v4f32 %f3\n"
7273                         "%t0                = OpFAdd %v4f32 %param1 %f_val\n"
7274                         "%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
7275                         "%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
7276                         "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7277                 },
7278                 {
7279                         "matrix",
7280                         "%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
7281                         "%cnull             = OpConstantNull %mat4x4_f32\n",
7282                         // Our null matrix * any vector should result in a zero vector.
7283                         "%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7284                         "%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7285                 }
7286         };
7287
7288         getHalfColorsFullAlpha(colors);
7289
7290         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7291         {
7292                 map<string, string> fragments;
7293                 fragments["pre_main"] = tests[testNdx].constants;
7294                 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7295                 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7296         }
7297         return opConstantNullTests.release();
7298 }
7299 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7300 {
7301         de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests                (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7302         RGBA                                                    inputColors[4];
7303         RGBA                                                    outputColors[4];
7304
7305
7306         const char                                              functionStart[]  =
7307                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7308                 "%param1 = OpFunctionParameter %v4f32\n"
7309                 "%lbl    = OpLabel\n";
7310
7311         const char                                              functionEnd[]           =
7312                 "OpReturnValue %transformed_param\n"
7313                 "OpFunctionEnd\n";
7314
7315         struct NameConstantsCode
7316         {
7317                 string name;
7318                 string constants;
7319                 string code;
7320         };
7321
7322         NameConstantsCode tests[] =
7323         {
7324                 {
7325                         "vec4",
7326
7327                         "%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7328                         "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7329                 },
7330                 {
7331                         "struct",
7332
7333                         "%stype             = OpTypeStruct %v4f32 %f32\n"
7334                         "%fp_stype          = OpTypePointer Function %stype\n"
7335                         "%f32_n_1           = OpConstant %f32 -1.0\n"
7336                         "%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
7337                         "%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7338                         "%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
7339
7340                         "%v                 = OpVariable %fp_stype Function %cval\n"
7341                         "%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7342                         "%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
7343                         "%vec_val           = OpLoad %v4f32 %vec_ptr\n"
7344                         "%f32_val           = OpLoad %f32 %f32_ptr\n"
7345                         "%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7346                         "%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7347                         "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7348                 },
7349                 {
7350                         // [1|0|0|0.5] [x] = x + 0.5
7351                         // [0|1|0|0.5] [y] = y + 0.5
7352                         // [0|0|1|0.5] [z] = z + 0.5
7353                         // [0|0|0|1  ] [1] = 1
7354                         "matrix",
7355
7356                         "%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
7357                         "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7358                         "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7359                         "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7360                         "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7361                         "%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7362
7363                         "%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
7364                 },
7365                 {
7366                         "array",
7367
7368                         "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7369                         "%fp_a4f32            = OpTypePointer Function %a4f32\n"
7370                         "%f32_n_1             = OpConstant %f32 -1.0\n"
7371                         "%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
7372                         "%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7373
7374                         "%v                   = OpVariable %fp_a4f32 Function %carr\n"
7375                         "%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
7376                         "%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
7377                         "%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
7378                         "%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
7379                         "%f_val               = OpLoad %f32 %f\n"
7380                         "%f1_val              = OpLoad %f32 %f1\n"
7381                         "%f2_val              = OpLoad %f32 %f2\n"
7382                         "%f3_val              = OpLoad %f32 %f3\n"
7383                         "%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
7384                         "%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
7385                         "%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
7386                         "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7387                         "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7388                 },
7389                 {
7390                         //
7391                         // [
7392                         //   {
7393                         //      0.0,
7394                         //      [ 1.0, 1.0, 1.0, 1.0]
7395                         //   },
7396                         //   {
7397                         //      1.0,
7398                         //      [ 0.0, 0.5, 0.0, 0.0]
7399                         //   }, //     ^^^
7400                         //   {
7401                         //      0.0,
7402                         //      [ 1.0, 1.0, 1.0, 1.0]
7403                         //   }
7404                         // ]
7405                         "array_of_struct_of_array",
7406
7407                         "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7408                         "%fp_a4f32            = OpTypePointer Function %a4f32\n"
7409                         "%stype               = OpTypeStruct %f32 %a4f32\n"
7410                         "%a3stype             = OpTypeArray %stype %c_u32_3\n"
7411                         "%fp_a3stype          = OpTypePointer Function %a3stype\n"
7412                         "%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7413                         "%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7414                         "%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7415                         "%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7416                         "%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7417
7418                         "%v                   = OpVariable %fp_a3stype Function %carr\n"
7419                         "%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7420                         "%f_l                 = OpLoad %f32 %f\n"
7421                         "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7422                         "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7423                 }
7424         };
7425
7426         getHalfColorsFullAlpha(inputColors);
7427         outputColors[0] = RGBA(255, 255, 255, 255);
7428         outputColors[1] = RGBA(255, 127, 127, 255);
7429         outputColors[2] = RGBA(127, 255, 127, 255);
7430         outputColors[3] = RGBA(127, 127, 255, 255);
7431
7432         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7433         {
7434                 map<string, string> fragments;
7435                 fragments["pre_main"] = tests[testNdx].constants;
7436                 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7437                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7438         }
7439         return opConstantCompositeTests.release();
7440 }
7441
7442 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7443 {
7444         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7445         RGBA                                                    inputColors[4];
7446         RGBA                                                    outputColors[4];
7447         map<string, string>                             fragments;
7448
7449         // vec4 test_code(vec4 param) {
7450         //   vec4 result = param;
7451         //   for (int i = 0; i < 4; ++i) {
7452         //     if (i == 0) result[i] = 0.;
7453         //     else        result[i] = 1. - result[i];
7454         //   }
7455         //   return result;
7456         // }
7457         const char                                              function[]                      =
7458                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7459                 "%param1    = OpFunctionParameter %v4f32\n"
7460                 "%lbl       = OpLabel\n"
7461                 "%iptr      = OpVariable %fp_i32 Function\n"
7462                 "%result    = OpVariable %fp_v4f32 Function\n"
7463                 "             OpStore %iptr %c_i32_0\n"
7464                 "             OpStore %result %param1\n"
7465                 "             OpBranch %loop\n"
7466
7467                 // Loop entry block.
7468                 "%loop      = OpLabel\n"
7469                 "%ival      = OpLoad %i32 %iptr\n"
7470                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7471                 "             OpLoopMerge %exit %if_entry None\n"
7472                 "             OpBranchConditional %lt_4 %if_entry %exit\n"
7473
7474                 // Merge block for loop.
7475                 "%exit      = OpLabel\n"
7476                 "%ret       = OpLoad %v4f32 %result\n"
7477                 "             OpReturnValue %ret\n"
7478
7479                 // If-statement entry block.
7480                 "%if_entry  = OpLabel\n"
7481                 "%loc       = OpAccessChain %fp_f32 %result %ival\n"
7482                 "%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
7483                 "             OpSelectionMerge %if_exit None\n"
7484                 "             OpBranchConditional %eq_0 %if_true %if_false\n"
7485
7486                 // False branch for if-statement.
7487                 "%if_false  = OpLabel\n"
7488                 "%val       = OpLoad %f32 %loc\n"
7489                 "%sub       = OpFSub %f32 %c_f32_1 %val\n"
7490                 "             OpStore %loc %sub\n"
7491                 "             OpBranch %if_exit\n"
7492
7493                 // Merge block for if-statement.
7494                 "%if_exit   = OpLabel\n"
7495                 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7496                 "             OpStore %iptr %ival_next\n"
7497                 "             OpBranch %loop\n"
7498
7499                 // True branch for if-statement.
7500                 "%if_true   = OpLabel\n"
7501                 "             OpStore %loc %c_f32_0\n"
7502                 "             OpBranch %if_exit\n"
7503
7504                 "             OpFunctionEnd\n";
7505
7506         fragments["testfun"]    = function;
7507
7508         inputColors[0]                  = RGBA(127, 127, 127, 0);
7509         inputColors[1]                  = RGBA(127, 0,   0,   0);
7510         inputColors[2]                  = RGBA(0,   127, 0,   0);
7511         inputColors[3]                  = RGBA(0,   0,   127, 0);
7512
7513         outputColors[0]                 = RGBA(0, 128, 128, 255);
7514         outputColors[1]                 = RGBA(0, 255, 255, 255);
7515         outputColors[2]                 = RGBA(0, 128, 255, 255);
7516         outputColors[3]                 = RGBA(0, 255, 128, 255);
7517
7518         createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7519
7520         return group.release();
7521 }
7522
7523 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7524 {
7525         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7526         RGBA                                                    inputColors[4];
7527         RGBA                                                    outputColors[4];
7528         map<string, string>                             fragments;
7529
7530         const char                                              typesAndConstants[]     =
7531                 "%c_f32_p2  = OpConstant %f32 0.2\n"
7532                 "%c_f32_p4  = OpConstant %f32 0.4\n"
7533                 "%c_f32_p6  = OpConstant %f32 0.6\n"
7534                 "%c_f32_p8  = OpConstant %f32 0.8\n";
7535
7536         // vec4 test_code(vec4 param) {
7537         //   vec4 result = param;
7538         //   for (int i = 0; i < 4; ++i) {
7539         //     switch (i) {
7540         //       case 0: result[i] += .2; break;
7541         //       case 1: result[i] += .6; break;
7542         //       case 2: result[i] += .4; break;
7543         //       case 3: result[i] += .8; break;
7544         //       default: break; // unreachable
7545         //     }
7546         //   }
7547         //   return result;
7548         // }
7549         const char                                              function[]                      =
7550                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7551                 "%param1    = OpFunctionParameter %v4f32\n"
7552                 "%lbl       = OpLabel\n"
7553                 "%iptr      = OpVariable %fp_i32 Function\n"
7554                 "%result    = OpVariable %fp_v4f32 Function\n"
7555                 "             OpStore %iptr %c_i32_0\n"
7556                 "             OpStore %result %param1\n"
7557                 "             OpBranch %loop\n"
7558
7559                 // Loop entry block.
7560                 "%loop      = OpLabel\n"
7561                 "%ival      = OpLoad %i32 %iptr\n"
7562                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7563                 "             OpLoopMerge %exit %switch_exit None\n"
7564                 "             OpBranchConditional %lt_4 %switch_entry %exit\n"
7565
7566                 // Merge block for loop.
7567                 "%exit      = OpLabel\n"
7568                 "%ret       = OpLoad %v4f32 %result\n"
7569                 "             OpReturnValue %ret\n"
7570
7571                 // Switch-statement entry block.
7572                 "%switch_entry   = OpLabel\n"
7573                 "%loc            = OpAccessChain %fp_f32 %result %ival\n"
7574                 "%val            = OpLoad %f32 %loc\n"
7575                 "                  OpSelectionMerge %switch_exit None\n"
7576                 "                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7577
7578                 "%case2          = OpLabel\n"
7579                 "%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
7580                 "                  OpStore %loc %addp4\n"
7581                 "                  OpBranch %switch_exit\n"
7582
7583                 "%switch_default = OpLabel\n"
7584                 "                  OpUnreachable\n"
7585
7586                 "%case3          = OpLabel\n"
7587                 "%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
7588                 "                  OpStore %loc %addp8\n"
7589                 "                  OpBranch %switch_exit\n"
7590
7591                 "%case0          = OpLabel\n"
7592                 "%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
7593                 "                  OpStore %loc %addp2\n"
7594                 "                  OpBranch %switch_exit\n"
7595
7596                 // Merge block for switch-statement.
7597                 "%switch_exit    = OpLabel\n"
7598                 "%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
7599                 "                  OpStore %iptr %ival_next\n"
7600                 "                  OpBranch %loop\n"
7601
7602                 "%case1          = OpLabel\n"
7603                 "%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
7604                 "                  OpStore %loc %addp6\n"
7605                 "                  OpBranch %switch_exit\n"
7606
7607                 "                  OpFunctionEnd\n";
7608
7609         fragments["pre_main"]   = typesAndConstants;
7610         fragments["testfun"]    = function;
7611
7612         inputColors[0]                  = RGBA(127, 27,  127, 51);
7613         inputColors[1]                  = RGBA(127, 0,   0,   51);
7614         inputColors[2]                  = RGBA(0,   27,  0,   51);
7615         inputColors[3]                  = RGBA(0,   0,   127, 51);
7616
7617         outputColors[0]                 = RGBA(178, 180, 229, 255);
7618         outputColors[1]                 = RGBA(178, 153, 102, 255);
7619         outputColors[2]                 = RGBA(51,  180, 102, 255);
7620         outputColors[3]                 = RGBA(51,  153, 229, 255);
7621
7622         createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7623
7624         return group.release();
7625 }
7626
7627 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
7628 {
7629         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
7630         RGBA                                                    inputColors[4];
7631         RGBA                                                    outputColors[4];
7632         map<string, string>                             fragments;
7633
7634         const char                                              decorations[]           =
7635                 "OpDecorate %array_group         ArrayStride 4\n"
7636                 "OpDecorate %struct_member_group Offset 0\n"
7637                 "%array_group         = OpDecorationGroup\n"
7638                 "%struct_member_group = OpDecorationGroup\n"
7639
7640                 "OpDecorate %group1 RelaxedPrecision\n"
7641                 "OpDecorate %group3 RelaxedPrecision\n"
7642                 "OpDecorate %group3 Invariant\n"
7643                 "OpDecorate %group3 Restrict\n"
7644                 "%group0 = OpDecorationGroup\n"
7645                 "%group1 = OpDecorationGroup\n"
7646                 "%group3 = OpDecorationGroup\n";
7647
7648         const char                                              typesAndConstants[]     =
7649                 "%a3f32     = OpTypeArray %f32 %c_u32_3\n"
7650                 "%struct1   = OpTypeStruct %a3f32\n"
7651                 "%struct2   = OpTypeStruct %a3f32\n"
7652                 "%fp_struct1 = OpTypePointer Function %struct1\n"
7653                 "%fp_struct2 = OpTypePointer Function %struct2\n"
7654                 "%c_f32_2    = OpConstant %f32 2.\n"
7655                 "%c_f32_n2   = OpConstant %f32 -2.\n"
7656
7657                 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
7658                 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
7659                 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
7660                 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
7661
7662         const char                                              function[]                      =
7663                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7664                 "%param     = OpFunctionParameter %v4f32\n"
7665                 "%entry     = OpLabel\n"
7666                 "%result    = OpVariable %fp_v4f32 Function\n"
7667                 "%v_struct1 = OpVariable %fp_struct1 Function\n"
7668                 "%v_struct2 = OpVariable %fp_struct2 Function\n"
7669                 "             OpStore %result %param\n"
7670                 "             OpStore %v_struct1 %c_struct1\n"
7671                 "             OpStore %v_struct2 %c_struct2\n"
7672                 "%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
7673                 "%val1      = OpLoad %f32 %ptr1\n"
7674                 "%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
7675                 "%val2      = OpLoad %f32 %ptr2\n"
7676                 "%addvalues = OpFAdd %f32 %val1 %val2\n"
7677                 "%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
7678                 "%val       = OpLoad %f32 %ptr\n"
7679                 "%addresult = OpFAdd %f32 %addvalues %val\n"
7680                 "             OpStore %ptr %addresult\n"
7681                 "%ret       = OpLoad %v4f32 %result\n"
7682                 "             OpReturnValue %ret\n"
7683                 "             OpFunctionEnd\n";
7684
7685         struct CaseNameDecoration
7686         {
7687                 string name;
7688                 string decoration;
7689         };
7690
7691         CaseNameDecoration tests[] =
7692         {
7693                 {
7694                         "same_decoration_group_on_multiple_types",
7695                         "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
7696                 },
7697                 {
7698                         "empty_decoration_group",
7699                         "OpGroupDecorate %group0      %a3f32\n"
7700                         "OpGroupDecorate %group0      %result\n"
7701                 },
7702                 {
7703                         "one_element_decoration_group",
7704                         "OpGroupDecorate %array_group %a3f32\n"
7705                 },
7706                 {
7707                         "multiple_elements_decoration_group",
7708                         "OpGroupDecorate %group3      %v_struct1\n"
7709                 },
7710                 {
7711                         "multiple_decoration_groups_on_same_variable",
7712                         "OpGroupDecorate %group0      %v_struct2\n"
7713                         "OpGroupDecorate %group1      %v_struct2\n"
7714                         "OpGroupDecorate %group3      %v_struct2\n"
7715                 },
7716                 {
7717                         "same_decoration_group_multiple_times",
7718                         "OpGroupDecorate %group1      %addvalues\n"
7719                         "OpGroupDecorate %group1      %addvalues\n"
7720                         "OpGroupDecorate %group1      %addvalues\n"
7721                 },
7722
7723         };
7724
7725         getHalfColorsFullAlpha(inputColors);
7726         getHalfColorsFullAlpha(outputColors);
7727
7728         for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
7729         {
7730                 fragments["decoration"] = decorations + tests[idx].decoration;
7731                 fragments["pre_main"]   = typesAndConstants;
7732                 fragments["testfun"]    = function;
7733
7734                 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
7735         }
7736
7737         return group.release();
7738 }
7739
7740 struct SpecConstantTwoIntGraphicsCase
7741 {
7742         const char*             caseName;
7743         const char*             scDefinition0;
7744         const char*             scDefinition1;
7745         const char*             scResultType;
7746         const char*             scOperation;
7747         deInt32                 scActualValue0;
7748         deInt32                 scActualValue1;
7749         const char*             resultOperation;
7750         RGBA                    expectedColors[4];
7751         deInt32                 scActualValueLength;
7752
7753                                         SpecConstantTwoIntGraphicsCase (const char*             name,
7754                                                                                                         const char*             definition0,
7755                                                                                                         const char*             definition1,
7756                                                                                                         const char*             resultType,
7757                                                                                                         const char*             operation,
7758                                                                                                         const deInt32   value0,
7759                                                                                                         const deInt32   value1,
7760                                                                                                         const char*             resultOp,
7761                                                                                                         const RGBA              (&output)[4],
7762                                                                                                         const deInt32   valueLength = sizeof(deInt32))
7763                                                 : caseName                              (name)
7764                                                 , scDefinition0                 (definition0)
7765                                                 , scDefinition1                 (definition1)
7766                                                 , scResultType                  (resultType)
7767                                                 , scOperation                   (operation)
7768                                                 , scActualValue0                (value0)
7769                                                 , scActualValue1                (value1)
7770                                                 , resultOperation               (resultOp)
7771                                                 , scActualValueLength   (valueLength)
7772         {
7773                 expectedColors[0] = output[0];
7774                 expectedColors[1] = output[1];
7775                 expectedColors[2] = output[2];
7776                 expectedColors[3] = output[3];
7777         }
7778 };
7779
7780 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
7781 {
7782         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
7783         vector<SpecConstantTwoIntGraphicsCase>  cases;
7784         RGBA                                                    inputColors[4];
7785         RGBA                                                    outputColors0[4];
7786         RGBA                                                    outputColors1[4];
7787         RGBA                                                    outputColors2[4];
7788
7789         const deInt32                                   m1AsFloat16                     = 0xbc00; // -1(fp16) == 1 01111 0000000000 == 1011 1100 0000 0000
7790
7791         const char      decorations1[]                  =
7792                 "OpDecorate %sc_0  SpecId 0\n"
7793                 "OpDecorate %sc_1  SpecId 1\n";
7794
7795         const char      typesAndConstants1[]    =
7796                 "${OPTYPE_DEFINITIONS:opt}"
7797                 "%sc_0      = OpSpecConstant${SC_DEF0}\n"
7798                 "%sc_1      = OpSpecConstant${SC_DEF1}\n"
7799                 "%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
7800
7801         const char      function1[]                             =
7802                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7803                 "%param     = OpFunctionParameter %v4f32\n"
7804                 "%label     = OpLabel\n"
7805                 "%result    = OpVariable %fp_v4f32 Function\n"
7806                 "${TYPE_CONVERT:opt}"
7807                 "             OpStore %result %param\n"
7808                 "%gen       = ${GEN_RESULT}\n"
7809                 "%index     = OpIAdd %i32 %gen %c_i32_1\n"
7810                 "%loc       = OpAccessChain %fp_f32 %result %index\n"
7811                 "%val       = OpLoad %f32 %loc\n"
7812                 "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
7813                 "             OpStore %loc %add\n"
7814                 "%ret       = OpLoad %v4f32 %result\n"
7815                 "             OpReturnValue %ret\n"
7816                 "             OpFunctionEnd\n";
7817
7818         inputColors[0] = RGBA(127, 127, 127, 255);
7819         inputColors[1] = RGBA(127, 0,   0,   255);
7820         inputColors[2] = RGBA(0,   127, 0,   255);
7821         inputColors[3] = RGBA(0,   0,   127, 255);
7822
7823         // Derived from inputColors[x] by adding 128 to inputColors[x][0].
7824         outputColors0[0] = RGBA(255, 127, 127, 255);
7825         outputColors0[1] = RGBA(255, 0,   0,   255);
7826         outputColors0[2] = RGBA(128, 127, 0,   255);
7827         outputColors0[3] = RGBA(128, 0,   127, 255);
7828
7829         // Derived from inputColors[x] by adding 128 to inputColors[x][1].
7830         outputColors1[0] = RGBA(127, 255, 127, 255);
7831         outputColors1[1] = RGBA(127, 128, 0,   255);
7832         outputColors1[2] = RGBA(0,   255, 0,   255);
7833         outputColors1[3] = RGBA(0,   128, 127, 255);
7834
7835         // Derived from inputColors[x] by adding 128 to inputColors[x][2].
7836         outputColors2[0] = RGBA(127, 127, 255, 255);
7837         outputColors2[1] = RGBA(127, 0,   128, 255);
7838         outputColors2[2] = RGBA(0,   127, 128, 255);
7839         outputColors2[3] = RGBA(0,   0,   255, 255);
7840
7841         const char addZeroToSc[]                = "OpIAdd %i32 %c_i32_0 %sc_op";
7842         const char addZeroToSc32[]              = "OpIAdd %i32 %c_i32_0 %sc_op32";
7843         const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
7844         const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
7845
7846         cases.push_back(SpecConstantTwoIntGraphicsCase("iadd",                                  " %i32 0",              " %i32 0",              "%i32",         "IAdd                 %sc_0 %sc_1",                             19,             -20,    addZeroToSc,            outputColors0));
7847         cases.push_back(SpecConstantTwoIntGraphicsCase("isub",                                  " %i32 0",              " %i32 0",              "%i32",         "ISub                 %sc_0 %sc_1",                             19,             20,             addZeroToSc,            outputColors0));
7848         cases.push_back(SpecConstantTwoIntGraphicsCase("imul",                                  " %i32 0",              " %i32 0",              "%i32",         "IMul                 %sc_0 %sc_1",                             -1,             -1,             addZeroToSc,            outputColors2));
7849         cases.push_back(SpecConstantTwoIntGraphicsCase("sdiv",                                  " %i32 0",              " %i32 0",              "%i32",         "SDiv                 %sc_0 %sc_1",                             -126,   126,    addZeroToSc,            outputColors0));
7850         cases.push_back(SpecConstantTwoIntGraphicsCase("udiv",                                  " %i32 0",              " %i32 0",              "%i32",         "UDiv                 %sc_0 %sc_1",                             126,    126,    addZeroToSc,            outputColors2));
7851         cases.push_back(SpecConstantTwoIntGraphicsCase("srem",                                  " %i32 0",              " %i32 0",              "%i32",         "SRem                 %sc_0 %sc_1",                             3,              2,              addZeroToSc,            outputColors2));
7852         cases.push_back(SpecConstantTwoIntGraphicsCase("smod",                                  " %i32 0",              " %i32 0",              "%i32",         "SMod                 %sc_0 %sc_1",                             3,              2,              addZeroToSc,            outputColors2));
7853         cases.push_back(SpecConstantTwoIntGraphicsCase("umod",                                  " %i32 0",              " %i32 0",              "%i32",         "UMod                 %sc_0 %sc_1",                             1001,   500,    addZeroToSc,            outputColors2));
7854         cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseand",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseAnd           %sc_0 %sc_1",                             0x33,   0x0d,   addZeroToSc,            outputColors2));
7855         cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseor",                             " %i32 0",              " %i32 0",              "%i32",         "BitwiseOr            %sc_0 %sc_1",                             0,              1,              addZeroToSc,            outputColors2));
7856         cases.push_back(SpecConstantTwoIntGraphicsCase("bitwisexor",                    " %i32 0",              " %i32 0",              "%i32",         "BitwiseXor           %sc_0 %sc_1",                             0x2e,   0x2f,   addZeroToSc,            outputColors2));
7857         cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightlogical",             " %i32 0",              " %i32 0",              "%i32",         "ShiftRightLogical    %sc_0 %sc_1",                             2,              1,              addZeroToSc,            outputColors2));
7858         cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightarithmetic",  " %i32 0",              " %i32 0",              "%i32",         "ShiftRightArithmetic %sc_0 %sc_1",                             -4,             2,              addZeroToSc,            outputColors0));
7859         cases.push_back(SpecConstantTwoIntGraphicsCase("shiftleftlogical",              " %i32 0",              " %i32 0",              "%i32",         "ShiftLeftLogical     %sc_0 %sc_1",                             1,              0,              addZeroToSc,            outputColors2));
7860         cases.push_back(SpecConstantTwoIntGraphicsCase("slessthan",                             " %i32 0",              " %i32 0",              "%bool",        "SLessThan            %sc_0 %sc_1",                             -20,    -10,    selectTrueUsingSc,      outputColors2));
7861         cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthan",                             " %i32 0",              " %i32 0",              "%bool",        "ULessThan            %sc_0 %sc_1",                             10,             20,             selectTrueUsingSc,      outputColors2));
7862         cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "SGreaterThan         %sc_0 %sc_1",                             -1000,  50,             selectFalseUsingSc,     outputColors2));
7863         cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthan",                  " %i32 0",              " %i32 0",              "%bool",        "UGreaterThan         %sc_0 %sc_1",                             10,             5,              selectTrueUsingSc,      outputColors2));
7864         cases.push_back(SpecConstantTwoIntGraphicsCase("slessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "SLessThanEqual       %sc_0 %sc_1",                             -10,    -10,    selectTrueUsingSc,      outputColors2));
7865         cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthanequal",                " %i32 0",              " %i32 0",              "%bool",        "ULessThanEqual       %sc_0 %sc_1",                             50,             100,    selectTrueUsingSc,      outputColors2));
7866         cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "SGreaterThanEqual    %sc_0 %sc_1",                             -1000,  50,             selectFalseUsingSc,     outputColors2));
7867         cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthanequal",             " %i32 0",              " %i32 0",              "%bool",        "UGreaterThanEqual    %sc_0 %sc_1",                             10,             10,             selectTrueUsingSc,      outputColors2));
7868         cases.push_back(SpecConstantTwoIntGraphicsCase("iequal",                                " %i32 0",              " %i32 0",              "%bool",        "IEqual               %sc_0 %sc_1",                             42,             24,             selectFalseUsingSc,     outputColors2));
7869         cases.push_back(SpecConstantTwoIntGraphicsCase("inotequal",                             " %i32 0",              " %i32 0",              "%bool",        "INotEqual            %sc_0 %sc_1",                             42,             24,             selectTrueUsingSc,      outputColors2));
7870         cases.push_back(SpecConstantTwoIntGraphicsCase("logicaland",                    "True %bool",   "True %bool",   "%bool",        "LogicalAnd           %sc_0 %sc_1",                             0,              1,              selectFalseUsingSc,     outputColors2));
7871         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalor",                             "False %bool",  "False %bool",  "%bool",        "LogicalOr            %sc_0 %sc_1",                             1,              0,              selectTrueUsingSc,      outputColors2));
7872         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalequal",                  "True %bool",   "True %bool",   "%bool",        "LogicalEqual         %sc_0 %sc_1",                             0,              1,              selectFalseUsingSc,     outputColors2));
7873         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnotequal",               "False %bool",  "False %bool",  "%bool",        "LogicalNotEqual      %sc_0 %sc_1",                             1,              0,              selectTrueUsingSc,      outputColors2));
7874         cases.push_back(SpecConstantTwoIntGraphicsCase("snegate",                               " %i32 0",              " %i32 0",              "%i32",         "SNegate              %sc_0",                                   -1,             0,              addZeroToSc,            outputColors2));
7875         cases.push_back(SpecConstantTwoIntGraphicsCase("not",                                   " %i32 0",              " %i32 0",              "%i32",         "Not                  %sc_0",                                   -2,             0,              addZeroToSc,            outputColors2));
7876         cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",                    "False %bool",  "False %bool",  "%bool",        "LogicalNot           %sc_0",                                   1,              0,              selectFalseUsingSc,     outputColors2));
7877         cases.push_back(SpecConstantTwoIntGraphicsCase("select",                                "False %bool",  " %i32 0",              "%i32",         "Select               %sc_0 %sc_1 %c_i32_0",    1,              1,              addZeroToSc,            outputColors2));
7878         cases.push_back(SpecConstantTwoIntGraphicsCase("sconvert",                              " %i32 0",              " %i32 0",              "%i16",         "SConvert             %sc_0",                                   -1,             0,              addZeroToSc32,          outputColors0));
7879         // -1082130432 stored as 32-bit two's complement is the binary representation of -1 as IEEE-754 Float
7880         cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert",                              " %f32 0",              " %f32 0",              "%f64",         "FConvert             %sc_0",                                   -1082130432, 0, addZeroToSc32,          outputColors0));
7881         cases.push_back(SpecConstantTwoIntGraphicsCase("fconvert16",                    " %f16 0",              " %f16 0",              "%f32",         "FConvert             %sc_0",                                   m1AsFloat16, 0, addZeroToSc32,          outputColors0, sizeof(deFloat16)));
7882         // \todo[2015-12-1 antiagainst] OpQuantizeToF16
7883
7884         for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7885         {
7886                 map<string, string>                     specializations;
7887                 map<string, string>                     fragments;
7888                 SpecConstants                           specConstants;
7889                 PushConstants                           noPushConstants;
7890                 GraphicsResources                       noResources;
7891                 GraphicsInterfaces                      noInterfaces;
7892                 vector<string>                          extensions;
7893                 VulkanFeatures                          requiredFeatures;
7894
7895                 // Special SPIR-V code for SConvert-case
7896                 if (strcmp(cases[caseNdx].caseName, "sconvert") == 0)
7897                 {
7898                         requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
7899                         fragments["capability"]                                 = "OpCapability Int16\n";                                       // Adds 16-bit integer capability
7900                         specializations["OPTYPE_DEFINITIONS"]   = "%i16 = OpTypeInt 16 1\n";                            // Adds 16-bit integer type
7901                         specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpSConvert %i32 %sc_op\n";        // Converts 16-bit integer to 32-bit integer
7902                 }
7903
7904                 // Special SPIR-V code for FConvert-case
7905                 if (strcmp(cases[caseNdx].caseName, "fconvert") == 0)
7906                 {
7907                         requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
7908                         fragments["capability"]                                 = "OpCapability Float64\n";                                     // Adds 64-bit float capability
7909                         specializations["OPTYPE_DEFINITIONS"]   = "%f64 = OpTypeFloat 64\n";                            // Adds 64-bit float type
7910                         specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpConvertFToS %i32 %sc_op\n";     // Converts 64-bit float to 32-bit integer
7911                 }
7912
7913                 // Special SPIR-V code for FConvert-case for 16-bit floats
7914                 if (strcmp(cases[caseNdx].caseName, "fconvert16") == 0)
7915                 {
7916                         extensions.push_back("VK_KHR_shader_float16_int8");
7917                         requiredFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
7918                         fragments["capability"]                                 = "OpCapability Float16\n";                                     // Adds 16-bit float capability
7919                         specializations["OPTYPE_DEFINITIONS"]   = "%f16 = OpTypeFloat 16\n";                            // Adds 16-bit float type
7920                         specializations["TYPE_CONVERT"]                 = "%sc_op32 = OpConvertFToS %i32 %sc_op\n";     // Converts 16-bit float to 32-bit integer
7921                 }
7922
7923                 specializations["SC_DEF0"]                      = cases[caseNdx].scDefinition0;
7924                 specializations["SC_DEF1"]                      = cases[caseNdx].scDefinition1;
7925                 specializations["SC_RESULT_TYPE"]       = cases[caseNdx].scResultType;
7926                 specializations["SC_OP"]                        = cases[caseNdx].scOperation;
7927                 specializations["GEN_RESULT"]           = cases[caseNdx].resultOperation;
7928
7929                 fragments["decoration"]                         = tcu::StringTemplate(decorations1).specialize(specializations);
7930                 fragments["pre_main"]                           = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
7931                 fragments["testfun"]                            = tcu::StringTemplate(function1).specialize(specializations);
7932
7933                 specConstants.append(&cases[caseNdx].scActualValue0, cases[caseNdx].scActualValueLength);
7934                 specConstants.append(&cases[caseNdx].scActualValue1, cases[caseNdx].scActualValueLength);
7935
7936                 createTestsForAllStages(
7937                         cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
7938                         noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
7939         }
7940
7941         const char      decorations2[]                  =
7942                 "OpDecorate %sc_0  SpecId 0\n"
7943                 "OpDecorate %sc_1  SpecId 1\n"
7944                 "OpDecorate %sc_2  SpecId 2\n";
7945
7946         const char      typesAndConstants2[]    =
7947                 "%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
7948                 "%vec3_undef  = OpUndef %v3i32\n"
7949
7950                 "%sc_0        = OpSpecConstant %i32 0\n"
7951                 "%sc_1        = OpSpecConstant %i32 0\n"
7952                 "%sc_2        = OpSpecConstant %i32 0\n"
7953                 "%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"                                                 // (sc_0, 0,    0)
7954                 "%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"                                                 // (0,    sc_1, 0)
7955                 "%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"                                                 // (0,    0,    sc_2)
7956                 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"   // (sc_0, ???,  0)
7957                 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"   // (???,  sc_1, 0)
7958                 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"   // (sc_2, ???,  sc_2)
7959                 "%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"                                             // (0,    sc_0, sc_1)
7960                 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"                                             // (sc_2, sc_0, sc_1)
7961                 "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"                                                 // sc_2
7962                 "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"                                                 // sc_0
7963                 "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"                                                 // sc_1
7964                 "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"                                                              // (sc_2 - sc_0)
7965                 "%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";                                                             // (sc_2 - sc_0) * sc_1
7966
7967         const char      function2[]                             =
7968                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7969                 "%param     = OpFunctionParameter %v4f32\n"
7970                 "%label     = OpLabel\n"
7971                 "%result    = OpVariable %fp_v4f32 Function\n"
7972                 "             OpStore %result %param\n"
7973                 "%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
7974                 "%val       = OpLoad %f32 %loc\n"
7975                 "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
7976                 "             OpStore %loc %add\n"
7977                 "%ret       = OpLoad %v4f32 %result\n"
7978                 "             OpReturnValue %ret\n"
7979                 "             OpFunctionEnd\n";
7980
7981         map<string, string>     fragments;
7982         SpecConstants           specConstants;
7983
7984         fragments["decoration"] = decorations2;
7985         fragments["pre_main"]   = typesAndConstants2;
7986         fragments["testfun"]    = function2;
7987
7988         specConstants.append<deInt32>(56789);
7989         specConstants.append<deInt32>(-2);
7990         specConstants.append<deInt32>(56788);
7991
7992         createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
7993
7994         return group.release();
7995 }
7996
7997 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
7998 {
7999         de::MovePtr<tcu::TestCaseGroup> group                           (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8000         RGBA                                                    inputColors[4];
8001         RGBA                                                    outputColors1[4];
8002         RGBA                                                    outputColors2[4];
8003         RGBA                                                    outputColors3[4];
8004         RGBA                                                    outputColors4[4];
8005         map<string, string>                             fragments1;
8006         map<string, string>                             fragments2;
8007         map<string, string>                             fragments3;
8008         map<string, string>                             fragments4;
8009         std::vector<std::string>                extensions4;
8010         GraphicsResources                               resources4;
8011         VulkanFeatures                                  vulkanFeatures4;
8012
8013         const char      typesAndConstants1[]    =
8014                 "%c_f32_p2  = OpConstant %f32 0.2\n"
8015                 "%c_f32_p4  = OpConstant %f32 0.4\n"
8016                 "%c_f32_p5  = OpConstant %f32 0.5\n"
8017                 "%c_f32_p8  = OpConstant %f32 0.8\n";
8018
8019         // vec4 test_code(vec4 param) {
8020         //   vec4 result = param;
8021         //   for (int i = 0; i < 4; ++i) {
8022         //     float operand;
8023         //     switch (i) {
8024         //       case 0: operand = .2; break;
8025         //       case 1: operand = .5; break;
8026         //       case 2: operand = .4; break;
8027         //       case 3: operand = .0; break;
8028         //       default: break; // unreachable
8029         //     }
8030         //     result[i] += operand;
8031         //   }
8032         //   return result;
8033         // }
8034         const char      function1[]                             =
8035                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8036                 "%param1    = OpFunctionParameter %v4f32\n"
8037                 "%lbl       = OpLabel\n"
8038                 "%iptr      = OpVariable %fp_i32 Function\n"
8039                 "%result    = OpVariable %fp_v4f32 Function\n"
8040                 "             OpStore %iptr %c_i32_0\n"
8041                 "             OpStore %result %param1\n"
8042                 "             OpBranch %loop\n"
8043
8044                 "%loop      = OpLabel\n"
8045                 "%ival      = OpLoad %i32 %iptr\n"
8046                 "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8047                 "             OpLoopMerge %exit %phi None\n"
8048                 "             OpBranchConditional %lt_4 %entry %exit\n"
8049
8050                 "%entry     = OpLabel\n"
8051                 "%loc       = OpAccessChain %fp_f32 %result %ival\n"
8052                 "%val       = OpLoad %f32 %loc\n"
8053                 "             OpSelectionMerge %phi None\n"
8054                 "             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8055
8056                 "%case0     = OpLabel\n"
8057                 "             OpBranch %phi\n"
8058                 "%case1     = OpLabel\n"
8059                 "             OpBranch %phi\n"
8060                 "%case2     = OpLabel\n"
8061                 "             OpBranch %phi\n"
8062                 "%case3     = OpLabel\n"
8063                 "             OpBranch %phi\n"
8064
8065                 "%default   = OpLabel\n"
8066                 "             OpUnreachable\n"
8067
8068                 "%phi       = OpLabel\n"
8069                 "%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8070                 "%add       = OpFAdd %f32 %val %operand\n"
8071                 "             OpStore %loc %add\n"
8072                 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8073                 "             OpStore %iptr %ival_next\n"
8074                 "             OpBranch %loop\n"
8075
8076                 "%exit      = OpLabel\n"
8077                 "%ret       = OpLoad %v4f32 %result\n"
8078                 "             OpReturnValue %ret\n"
8079
8080                 "             OpFunctionEnd\n";
8081
8082         fragments1["pre_main"]  = typesAndConstants1;
8083         fragments1["testfun"]   = function1;
8084
8085         getHalfColorsFullAlpha(inputColors);
8086
8087         outputColors1[0]                = RGBA(178, 255, 229, 255);
8088         outputColors1[1]                = RGBA(178, 127, 102, 255);
8089         outputColors1[2]                = RGBA(51,  255, 102, 255);
8090         outputColors1[3]                = RGBA(51,  127, 229, 255);
8091
8092         createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8093
8094         const char      typesAndConstants2[]    =
8095                 "%c_f32_p2  = OpConstant %f32 0.2\n";
8096
8097         // Add .4 to the second element of the given parameter.
8098         const char      function2[]                             =
8099                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8100                 "%param     = OpFunctionParameter %v4f32\n"
8101                 "%entry     = OpLabel\n"
8102                 "%result    = OpVariable %fp_v4f32 Function\n"
8103                 "             OpStore %result %param\n"
8104                 "%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8105                 "%val       = OpLoad %f32 %loc\n"
8106                 "             OpBranch %phi\n"
8107
8108                 "%phi        = OpLabel\n"
8109                 "%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
8110                 "%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
8111                 "%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
8112                 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8113                 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8114                 "              OpLoopMerge %exit %phi None\n"
8115                 "              OpBranchConditional %still_loop %phi %exit\n"
8116
8117                 "%exit       = OpLabel\n"
8118                 "              OpStore %loc %accum\n"
8119                 "%ret        = OpLoad %v4f32 %result\n"
8120                 "              OpReturnValue %ret\n"
8121
8122                 "              OpFunctionEnd\n";
8123
8124         fragments2["pre_main"]  = typesAndConstants2;
8125         fragments2["testfun"]   = function2;
8126
8127         outputColors2[0]                        = RGBA(127, 229, 127, 255);
8128         outputColors2[1]                        = RGBA(127, 102, 0,   255);
8129         outputColors2[2]                        = RGBA(0,   229, 0,   255);
8130         outputColors2[3]                        = RGBA(0,   102, 127, 255);
8131
8132         createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8133
8134         const char      typesAndConstants3[]    =
8135                 "%true      = OpConstantTrue %bool\n"
8136                 "%false     = OpConstantFalse %bool\n"
8137                 "%c_f32_p2  = OpConstant %f32 0.2\n";
8138
8139         // Swap the second and the third element of the given parameter.
8140         const char      function3[]                             =
8141                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8142                 "%param     = OpFunctionParameter %v4f32\n"
8143                 "%entry     = OpLabel\n"
8144                 "%result    = OpVariable %fp_v4f32 Function\n"
8145                 "             OpStore %result %param\n"
8146                 "%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
8147                 "%a_init    = OpLoad %f32 %a_loc\n"
8148                 "%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
8149                 "%b_init    = OpLoad %f32 %b_loc\n"
8150                 "             OpBranch %phi\n"
8151
8152                 "%phi        = OpLabel\n"
8153                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8154                 "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
8155                 "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
8156                 "              OpLoopMerge %exit %phi None\n"
8157                 "              OpBranchConditional %still_loop %phi %exit\n"
8158
8159                 "%exit       = OpLabel\n"
8160                 "              OpStore %a_loc %a_next\n"
8161                 "              OpStore %b_loc %b_next\n"
8162                 "%ret        = OpLoad %v4f32 %result\n"
8163                 "              OpReturnValue %ret\n"
8164
8165                 "              OpFunctionEnd\n";
8166
8167         fragments3["pre_main"]  = typesAndConstants3;
8168         fragments3["testfun"]   = function3;
8169
8170         outputColors3[0]                        = RGBA(127, 127, 127, 255);
8171         outputColors3[1]                        = RGBA(127, 0,   0,   255);
8172         outputColors3[2]                        = RGBA(0,   0,   127, 255);
8173         outputColors3[3]                        = RGBA(0,   127, 0,   255);
8174
8175         createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8176
8177         const char      typesAndConstants4[]    =
8178                 "%f16        = OpTypeFloat 16\n"
8179                 "%v4f16      = OpTypeVector %f16 4\n"
8180                 "%fp_f16     = OpTypePointer Function %f16\n"
8181                 "%fp_v4f16   = OpTypePointer Function %v4f16\n"
8182                 "%true       = OpConstantTrue %bool\n"
8183                 "%false      = OpConstantFalse %bool\n"
8184                 "%c_f32_p2   = OpConstant %f32 0.2\n";
8185
8186         // Swap the second and the third element of the given parameter.
8187         const char      function4[]                             =
8188                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8189                 "%param      = OpFunctionParameter %v4f32\n"
8190                 "%entry      = OpLabel\n"
8191                 "%result     = OpVariable %fp_v4f16 Function\n"
8192                 "%param16    = OpFConvert %v4f16 %param\n"
8193                 "              OpStore %result %param16\n"
8194                 "%a_loc      = OpAccessChain %fp_f16 %result %c_i32_1\n"
8195                 "%a_init     = OpLoad %f16 %a_loc\n"
8196                 "%b_loc      = OpAccessChain %fp_f16 %result %c_i32_2\n"
8197                 "%b_init     = OpLoad %f16 %b_loc\n"
8198                 "              OpBranch %phi\n"
8199
8200                 "%phi        = OpLabel\n"
8201                 "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8202                 "%a_next     = OpPhi %f16  %a_init %entry %b_next %phi\n"
8203                 "%b_next     = OpPhi %f16  %b_init %entry %a_next %phi\n"
8204                 "              OpLoopMerge %exit %phi None\n"
8205                 "              OpBranchConditional %still_loop %phi %exit\n"
8206
8207                 "%exit       = OpLabel\n"
8208                 "              OpStore %a_loc %a_next\n"
8209                 "              OpStore %b_loc %b_next\n"
8210                 "%ret16      = OpLoad %v4f16 %result\n"
8211                 "%ret        = OpFConvert %v4f32 %ret16\n"
8212                 "              OpReturnValue %ret\n"
8213
8214                 "              OpFunctionEnd\n";
8215
8216         fragments4["pre_main"]          = typesAndConstants4;
8217         fragments4["testfun"]           = function4;
8218         fragments4["capability"]        = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
8219         fragments4["extension"]         = "OpExtension \"SPV_KHR_16bit_storage\"";
8220
8221         extensions4.push_back("VK_KHR_16bit_storage");
8222         extensions4.push_back("VK_KHR_shader_float16_int8");
8223
8224         vulkanFeatures4.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
8225         vulkanFeatures4.extFloat16Int8  = EXTFLOAT16INT8FEATURES_FLOAT16;
8226
8227         outputColors4[0]                        = RGBA(127, 127, 127, 255);
8228         outputColors4[1]                        = RGBA(127, 0,   0,   255);
8229         outputColors4[2]                        = RGBA(0,   0,   127, 255);
8230         outputColors4[3]                        = RGBA(0,   127, 0,   255);
8231
8232         createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8233
8234         return group.release();
8235 }
8236
8237 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8238 {
8239         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8240         RGBA                                                    inputColors[4];
8241         RGBA                                                    outputColors[4];
8242
8243         // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8244         // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8245         // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8246         // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8247         const char                                              constantsAndTypes[]      =
8248                 "%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8249                 "%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8250                 "%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8251                 "%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8252                 "%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
8253
8254         const char                                              function[]       =
8255                 "%test_code      = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8256                 "%param          = OpFunctionParameter %v4f32\n"
8257                 "%label          = OpLabel\n"
8258                 "%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8259                 "%var2           = OpVariable %fp_f32 Function\n"
8260                 "%red            = OpCompositeExtract %f32 %param 0\n"
8261                 "%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8262                 "                  OpStore %var2 %plus_red\n"
8263                 "%val1           = OpLoad %f32 %var1\n"
8264                 "%val2           = OpLoad %f32 %var2\n"
8265                 "%mul            = OpFMul %f32 %val1 %val2\n"
8266                 "%add            = OpFAdd %f32 %mul %c_f32_n1\n"
8267                 "%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
8268                 "%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8269                 "%success        = OpLogicalOr %bool %is0 %isn1n24\n"
8270                 "%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
8271                 "%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8272                 "                  OpReturnValue %ret\n"
8273                 "                  OpFunctionEnd\n";
8274
8275         struct CaseNameDecoration
8276         {
8277                 string name;
8278                 string decoration;
8279         };
8280
8281
8282         CaseNameDecoration tests[] = {
8283                 {"multiplication",      "OpDecorate %mul NoContraction"},
8284                 {"addition",            "OpDecorate %add NoContraction"},
8285                 {"both",                        "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8286         };
8287
8288         getHalfColorsFullAlpha(inputColors);
8289
8290         for (deUint8 idx = 0; idx < 4; ++idx)
8291         {
8292                 inputColors[idx].setRed(0);
8293                 outputColors[idx] = RGBA(0, 0, 0, 255);
8294         }
8295
8296         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8297         {
8298                 map<string, string> fragments;
8299
8300                 fragments["decoration"] = tests[testNdx].decoration;
8301                 fragments["pre_main"] = constantsAndTypes;
8302                 fragments["testfun"] = function;
8303
8304                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8305         }
8306
8307         return group.release();
8308 }
8309
8310 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8311 {
8312         de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8313         RGBA                                                    colors[4];
8314
8315         const char                                              constantsAndTypes[]      =
8316                 "%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8317                 "%fp_a2f32          = OpTypePointer Function %a2f32\n"
8318                 "%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
8319                 "%fp_stype          = OpTypePointer Function %stype\n";
8320
8321         const char                                              function[]       =
8322                 "%test_code         = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8323                 "%param1            = OpFunctionParameter %v4f32\n"
8324                 "%lbl               = OpLabel\n"
8325                 "%v1                = OpVariable %fp_v4f32 Function\n"
8326                 "%v2                = OpVariable %fp_a2f32 Function\n"
8327                 "%v3                = OpVariable %fp_f32 Function\n"
8328                 "%v                 = OpVariable %fp_stype Function\n"
8329                 "%vv                = OpVariable %fp_stype Function\n"
8330                 "%vvv               = OpVariable %fp_f32 Function\n"
8331
8332                 "                     OpStore %v1 %c_v4f32_1_1_1_1\n"
8333                 "                     OpStore %v2 %c_a2f32_1\n"
8334                 "                     OpStore %v3 %c_f32_1\n"
8335
8336                 "%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8337                 "%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8338                 "%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
8339                 "%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
8340                 "%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
8341                 "%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
8342
8343                 "                    OpStore %p_v4f32 %v1_v ${access_type}\n"
8344                 "                    OpStore %p_a2f32 %v2_v ${access_type}\n"
8345                 "                    OpStore %p_f32 %v3_v ${access_type}\n"
8346
8347                 "                    OpCopyMemory %vv %v ${access_type}\n"
8348                 "                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
8349
8350                 "%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8351                 "%v_f32_2          = OpLoad %f32 %p_f32_2\n"
8352                 "%v_f32_3          = OpLoad %f32 %vvv\n"
8353
8354                 "%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8355                 "%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8356                 "                    OpReturnValue %ret2\n"
8357                 "                    OpFunctionEnd\n";
8358
8359         struct NameMemoryAccess
8360         {
8361                 string name;
8362                 string accessType;
8363         };
8364
8365
8366         NameMemoryAccess tests[] =
8367         {
8368                 { "none", "" },
8369                 { "volatile", "Volatile" },
8370                 { "aligned",  "Aligned 1" },
8371                 { "volatile_aligned",  "Volatile|Aligned 1" },
8372                 { "nontemporal_aligned",  "Nontemporal|Aligned 1" },
8373                 { "volatile_nontemporal",  "Volatile|Nontemporal" },
8374                 { "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
8375         };
8376
8377         getHalfColorsFullAlpha(colors);
8378
8379         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8380         {
8381                 map<string, string> fragments;
8382                 map<string, string> memoryAccess;
8383                 memoryAccess["access_type"] = tests[testNdx].accessType;
8384
8385                 fragments["pre_main"] = constantsAndTypes;
8386                 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8387                 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8388         }
8389         return memoryAccessTests.release();
8390 }
8391 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8392 {
8393         de::MovePtr<tcu::TestCaseGroup>         opUndefTests             (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8394         RGBA                                                            defaultColors[4];
8395         map<string, string>                                     fragments;
8396         getDefaultColors(defaultColors);
8397
8398         // First, simple cases that don't do anything with the OpUndef result.
8399         struct NameCodePair { string name, decl, type; };
8400         const NameCodePair tests[] =
8401         {
8402                 {"bool", "", "%bool"},
8403                 {"vec2uint32", "", "%v2u32"},
8404                 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8405                 {"sampler", "%type = OpTypeSampler", "%type"},
8406                 {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8407                 {"pointer", "", "%fp_i32"},
8408                 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8409                 {"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8410                 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8411         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8412         {
8413                 fragments["undef_type"] = tests[testNdx].type;
8414                 fragments["testfun"] = StringTemplate(
8415                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8416                         "%param1 = OpFunctionParameter %v4f32\n"
8417                         "%label_testfun = OpLabel\n"
8418                         "%undef = OpUndef ${undef_type}\n"
8419                         "OpReturnValue %param1\n"
8420                         "OpFunctionEnd\n").specialize(fragments);
8421                 fragments["pre_main"] = tests[testNdx].decl;
8422                 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8423         }
8424         fragments.clear();
8425
8426         fragments["testfun"] =
8427                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8428                 "%param1 = OpFunctionParameter %v4f32\n"
8429                 "%label_testfun = OpLabel\n"
8430                 "%undef = OpUndef %f32\n"
8431                 "%zero = OpFMul %f32 %undef %c_f32_0\n"
8432                 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8433                 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8434                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8435                 "%b = OpFAdd %f32 %a %actually_zero\n"
8436                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8437                 "OpReturnValue %ret\n"
8438                 "OpFunctionEnd\n";
8439
8440         createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8441
8442         fragments["testfun"] =
8443                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8444                 "%param1 = OpFunctionParameter %v4f32\n"
8445                 "%label_testfun = OpLabel\n"
8446                 "%undef = OpUndef %i32\n"
8447                 "%zero = OpIMul %i32 %undef %c_i32_0\n"
8448                 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8449                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8450                 "OpReturnValue %ret\n"
8451                 "OpFunctionEnd\n";
8452
8453         createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8454
8455         fragments["testfun"] =
8456                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8457                 "%param1 = OpFunctionParameter %v4f32\n"
8458                 "%label_testfun = OpLabel\n"
8459                 "%undef = OpUndef %u32\n"
8460                 "%zero = OpIMul %u32 %undef %c_i32_0\n"
8461                 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8462                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8463                 "OpReturnValue %ret\n"
8464                 "OpFunctionEnd\n";
8465
8466         createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8467
8468         fragments["testfun"] =
8469                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8470                 "%param1 = OpFunctionParameter %v4f32\n"
8471                 "%label_testfun = OpLabel\n"
8472                 "%undef = OpUndef %v4f32\n"
8473                 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8474                 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8475                 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8476                 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8477                 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8478                 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8479                 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8480                 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8481                 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8482                 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8483                 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8484                 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8485                 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8486                 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8487                 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8488                 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8489                 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8490                 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8491                 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8492                 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8493                 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8494                 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8495                 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8496                 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8497                 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8498                 "OpReturnValue %ret\n"
8499                 "OpFunctionEnd\n";
8500
8501         createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8502
8503         fragments["pre_main"] =
8504                 "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8505         fragments["testfun"] =
8506                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8507                 "%param1 = OpFunctionParameter %v4f32\n"
8508                 "%label_testfun = OpLabel\n"
8509                 "%undef = OpUndef %m2x2f32\n"
8510                 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8511                 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8512                 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8513                 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8514                 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8515                 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8516                 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8517                 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8518                 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8519                 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8520                 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8521                 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8522                 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8523                 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8524                 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8525                 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8526                 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8527                 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8528                 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8529                 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8530                 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8531                 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8532                 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8533                 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8534                 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8535                 "OpReturnValue %ret\n"
8536                 "OpFunctionEnd\n";
8537
8538         createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8539
8540         return opUndefTests.release();
8541 }
8542
8543 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
8544 {
8545         const RGBA              inputColors[4]          =
8546         {
8547                 RGBA(0,         0,              0,              255),
8548                 RGBA(0,         0,              255,    255),
8549                 RGBA(0,         255,    0,              255),
8550                 RGBA(0,         255,    255,    255)
8551         };
8552
8553         const RGBA              expectedColors[4]       =
8554         {
8555                 RGBA(255,        0,              0,              255),
8556                 RGBA(255,        0,              0,              255),
8557                 RGBA(255,        0,              0,              255),
8558                 RGBA(255,        0,              0,              255)
8559         };
8560
8561         const struct SingleFP16Possibility
8562         {
8563                 const char* name;
8564                 const char* constant;  // Value to assign to %test_constant.
8565                 float           valueAsFloat;
8566                 const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
8567         }                               tests[]                         =
8568         {
8569                 {
8570                         "negative",
8571                         "-0x1.3p1\n",
8572                         -constructNormalizedFloat(1, 0x300000),
8573                         "%cond = OpFOrdEqual %bool %c %test_constant\n"
8574                 }, // -19
8575                 {
8576                         "positive",
8577                         "0x1.0p7\n",
8578                         constructNormalizedFloat(7, 0x000000),
8579                         "%cond = OpFOrdEqual %bool %c %test_constant\n"
8580                 },  // +128
8581                 // SPIR-V requires that OpQuantizeToF16 flushes
8582                 // any numbers that would end up denormalized in F16 to zero.
8583                 {
8584                         "denorm",
8585                         "0x0.0006p-126\n",
8586                         std::ldexp(1.5f, -140),
8587                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8588                 },  // denorm
8589                 {
8590                         "negative_denorm",
8591                         "-0x0.0006p-126\n",
8592                         -std::ldexp(1.5f, -140),
8593                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8594                 }, // -denorm
8595                 {
8596                         "too_small",
8597                         "0x1.0p-16\n",
8598                         std::ldexp(1.0f, -16),
8599                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8600                 },     // too small positive
8601                 {
8602                         "negative_too_small",
8603                         "-0x1.0p-32\n",
8604                         -std::ldexp(1.0f, -32),
8605                         "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
8606                 },      // too small negative
8607                 {
8608                         "negative_inf",
8609                         "-0x1.0p128\n",
8610                         -std::ldexp(1.0f, 128),
8611
8612                         "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
8613                         "%inf = OpIsInf %bool %c\n"
8614                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8615                 },     // -inf to -inf
8616                 {
8617                         "inf",
8618                         "0x1.0p128\n",
8619                         std::ldexp(1.0f, 128),
8620
8621                         "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
8622                         "%inf = OpIsInf %bool %c\n"
8623                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8624                 },     // +inf to +inf
8625                 {
8626                         "round_to_negative_inf",
8627                         "-0x1.0p32\n",
8628                         -std::ldexp(1.0f, 32),
8629
8630                         "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
8631                         "%inf = OpIsInf %bool %c\n"
8632                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8633                 },     // round to -inf
8634                 {
8635                         "round_to_inf",
8636                         "0x1.0p16\n",
8637                         std::ldexp(1.0f, 16),
8638
8639                         "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
8640                         "%inf = OpIsInf %bool %c\n"
8641                         "%cond = OpLogicalAnd %bool %gz %inf\n"
8642                 },     // round to +inf
8643                 {
8644                         "nan",
8645                         "0x1.1p128\n",
8646                         std::numeric_limits<float>::quiet_NaN(),
8647
8648                         // Test for any NaN value, as NaNs are not preserved
8649                         "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
8650                         "%cond = OpIsNan %bool %direct_quant\n"
8651                 }, // nan
8652                 {
8653                         "negative_nan",
8654                         "-0x1.0001p128\n",
8655                         std::numeric_limits<float>::quiet_NaN(),
8656
8657                         // Test for any NaN value, as NaNs are not preserved
8658                         "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
8659                         "%cond = OpIsNan %bool %direct_quant\n"
8660                 } // -nan
8661         };
8662         const char*             constants                       =
8663                 "%test_constant = OpConstant %f32 ";  // The value will be test.constant.
8664
8665         StringTemplate  function                        (
8666                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8667                 "%param1        = OpFunctionParameter %v4f32\n"
8668                 "%label_testfun = OpLabel\n"
8669                 "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8670                 "%b             = OpFAdd %f32 %test_constant %a\n"
8671                 "%c             = OpQuantizeToF16 %f32 %b\n"
8672                 "${condition}\n"
8673                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
8674                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
8675                 "                 OpReturnValue %retval\n"
8676                 "OpFunctionEnd\n"
8677         );
8678
8679         const char*             specDecorations         = "OpDecorate %test_constant SpecId 0\n";
8680         const char*             specConstants           =
8681                         "%test_constant = OpSpecConstant %f32 0.\n"
8682                         "%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
8683
8684         StringTemplate  specConstantFunction(
8685                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8686                 "%param1        = OpFunctionParameter %v4f32\n"
8687                 "%label_testfun = OpLabel\n"
8688                 "${condition}\n"
8689                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
8690                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
8691                 "                 OpReturnValue %retval\n"
8692                 "OpFunctionEnd\n"
8693         );
8694
8695         for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
8696         {
8697                 map<string, string>                                                             codeSpecialization;
8698                 map<string, string>                                                             fragments;
8699                 codeSpecialization["condition"]                                 = tests[idx].condition;
8700                 fragments["testfun"]                                                    = function.specialize(codeSpecialization);
8701                 fragments["pre_main"]                                                   = string(constants) + tests[idx].constant + "\n";
8702                 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
8703         }
8704
8705         for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
8706         {
8707                 map<string, string>                                                             codeSpecialization;
8708                 map<string, string>                                                             fragments;
8709                 SpecConstants                                                                   passConstants;
8710
8711                 codeSpecialization["condition"]                                 = tests[idx].condition;
8712                 fragments["testfun"]                                                    = specConstantFunction.specialize(codeSpecialization);
8713                 fragments["decoration"]                                                 = specDecorations;
8714                 fragments["pre_main"]                                                   = specConstants;
8715
8716                 passConstants.append<float>(tests[idx].valueAsFloat);
8717
8718                 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
8719         }
8720 }
8721
8722 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
8723 {
8724         RGBA inputColors[4] =  {
8725                 RGBA(0,         0,              0,              255),
8726                 RGBA(0,         0,              255,    255),
8727                 RGBA(0,         255,    0,              255),
8728                 RGBA(0,         255,    255,    255)
8729         };
8730
8731         RGBA expectedColors[4] =
8732         {
8733                 RGBA(255,        0,              0,              255),
8734                 RGBA(255,        0,              0,              255),
8735                 RGBA(255,        0,              0,              255),
8736                 RGBA(255,        0,              0,              255)
8737         };
8738
8739         struct DualFP16Possibility
8740         {
8741                 const char* name;
8742                 const char* input;
8743                 float           inputAsFloat;
8744                 const char* possibleOutput1;
8745                 const char* possibleOutput2;
8746         } tests[] = {
8747                 {
8748                         "positive_round_up_or_round_down",
8749                         "0x1.3003p8",
8750                         constructNormalizedFloat(8, 0x300300),
8751                         "0x1.304p8",
8752                         "0x1.3p8"
8753                 },
8754                 {
8755                         "negative_round_up_or_round_down",
8756                         "-0x1.6008p-7",
8757                         -constructNormalizedFloat(-7, 0x600800),
8758                         "-0x1.6p-7",
8759                         "-0x1.604p-7"
8760                 },
8761                 {
8762                         "carry_bit",
8763                         "0x1.01ep2",
8764                         constructNormalizedFloat(2, 0x01e000),
8765                         "0x1.01cp2",
8766                         "0x1.02p2"
8767                 },
8768                 {
8769                         "carry_to_exponent",
8770                         "0x1.ffep1",
8771                         constructNormalizedFloat(1, 0xffe000),
8772                         "0x1.ffcp1",
8773                         "0x1.0p2"
8774                 },
8775         };
8776         StringTemplate constants (
8777                 "%input_const = OpConstant %f32 ${input}\n"
8778                 "%possible_solution1 = OpConstant %f32 ${output1}\n"
8779                 "%possible_solution2 = OpConstant %f32 ${output2}\n"
8780                 );
8781
8782         StringTemplate specConstants (
8783                 "%input_const = OpSpecConstant %f32 0.\n"
8784                 "%possible_solution1 = OpConstant %f32 ${output1}\n"
8785                 "%possible_solution2 = OpConstant %f32 ${output2}\n"
8786         );
8787
8788         const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
8789
8790         const char* function  =
8791                 "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8792                 "%param1        = OpFunctionParameter %v4f32\n"
8793                 "%label_testfun = OpLabel\n"
8794                 "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8795                 // For the purposes of this test we assume that 0.f will always get
8796                 // faithfully passed through the pipeline stages.
8797                 "%b             = OpFAdd %f32 %input_const %a\n"
8798                 "%c             = OpQuantizeToF16 %f32 %b\n"
8799                 "%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
8800                 "%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
8801                 "%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
8802                 "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
8803                 "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
8804                 "                 OpReturnValue %retval\n"
8805                 "OpFunctionEnd\n";
8806
8807         for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
8808                 map<string, string>                                                                     fragments;
8809                 map<string, string>                                                                     constantSpecialization;
8810
8811                 constantSpecialization["input"]                                         = tests[idx].input;
8812                 constantSpecialization["output1"]                                       = tests[idx].possibleOutput1;
8813                 constantSpecialization["output2"]                                       = tests[idx].possibleOutput2;
8814                 fragments["testfun"]                                                            = function;
8815                 fragments["pre_main"]                                                           = constants.specialize(constantSpecialization);
8816                 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
8817         }
8818
8819         for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
8820                 map<string, string>                                                                     fragments;
8821                 map<string, string>                                                                     constantSpecialization;
8822                 SpecConstants                                                                           passConstants;
8823
8824                 constantSpecialization["output1"]                                       = tests[idx].possibleOutput1;
8825                 constantSpecialization["output2"]                                       = tests[idx].possibleOutput2;
8826                 fragments["testfun"]                                                            = function;
8827                 fragments["decoration"]                                                         = specDecorations;
8828                 fragments["pre_main"]                                                           = specConstants.specialize(constantSpecialization);
8829
8830                 passConstants.append<float>(tests[idx].inputAsFloat);
8831
8832                 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
8833         }
8834 }
8835
8836 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
8837 {
8838         de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
8839         createOpQuantizeSingleOptionTests(opQuantizeTests.get());
8840         createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
8841         return opQuantizeTests.release();
8842 }
8843
8844 struct ShaderPermutation
8845 {
8846         deUint8 vertexPermutation;
8847         deUint8 geometryPermutation;
8848         deUint8 tesscPermutation;
8849         deUint8 tessePermutation;
8850         deUint8 fragmentPermutation;
8851 };
8852
8853 ShaderPermutation getShaderPermutation(deUint8 inputValue)
8854 {
8855         ShaderPermutation       permutation =
8856         {
8857                 static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
8858                 static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
8859                 static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
8860                 static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
8861                 static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
8862         };
8863         return permutation;
8864 }
8865
8866 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
8867 {
8868         RGBA                                                            defaultColors[4];
8869         RGBA                                                            invertedColors[4];
8870         de::MovePtr<tcu::TestCaseGroup>         moduleTests                     (new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
8871
8872         getDefaultColors(defaultColors);
8873         getInvertedDefaultColors(invertedColors);
8874
8875         // Combined module tests
8876         {
8877                 // Shader stages: vertex and fragment
8878                 {
8879                         const ShaderElement combinedPipeline[]  =
8880                         {
8881                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8882                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8883                         };
8884
8885                         addFunctionCaseWithPrograms<InstanceContext>(
8886                                 moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
8887                                 createInstanceContext(combinedPipeline, map<string, string>()));
8888                 }
8889
8890                 // Shader stages: vertex, geometry and fragment
8891                 {
8892                         const ShaderElement combinedPipeline[]  =
8893                         {
8894                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8895                                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
8896                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8897                         };
8898
8899                         addFunctionCaseWithPrograms<InstanceContext>(
8900                                 moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
8901                                 createInstanceContext(combinedPipeline, map<string, string>()));
8902                 }
8903
8904                 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
8905                 {
8906                         const ShaderElement combinedPipeline[]  =
8907                         {
8908                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8909                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
8910                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
8911                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8912                         };
8913
8914                         addFunctionCaseWithPrograms<InstanceContext>(
8915                                 moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
8916                                 createInstanceContext(combinedPipeline, map<string, string>()));
8917                 }
8918
8919                 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
8920                 {
8921                         const ShaderElement combinedPipeline[]  =
8922                         {
8923                                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
8924                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
8925                                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
8926                                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
8927                                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
8928                         };
8929
8930                         addFunctionCaseWithPrograms<InstanceContext>(
8931                                 moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
8932                                 createInstanceContext(combinedPipeline, map<string, string>()));
8933                 }
8934         }
8935
8936         const char* numbers[] =
8937         {
8938                 "1", "2"
8939         };
8940
8941         for (deInt8 idx = 0; idx < 32; ++idx)
8942         {
8943                 ShaderPermutation                       permutation             = getShaderPermutation(idx);
8944                 string                                          name                    = string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
8945                 const ShaderElement                     pipeline[]              =
8946                 {
8947                         ShaderElement("vert",   string("vert") +        numbers[permutation.vertexPermutation],         VK_SHADER_STAGE_VERTEX_BIT),
8948                         ShaderElement("geom",   string("geom") +        numbers[permutation.geometryPermutation],       VK_SHADER_STAGE_GEOMETRY_BIT),
8949                         ShaderElement("tessc",  string("tessc") +       numbers[permutation.tesscPermutation],          VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
8950                         ShaderElement("tesse",  string("tesse") +       numbers[permutation.tessePermutation],          VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
8951                         ShaderElement("frag",   string("frag") +        numbers[permutation.fragmentPermutation],       VK_SHADER_STAGE_FRAGMENT_BIT)
8952                 };
8953
8954                 // If there are an even number of swaps, then it should be no-op.
8955                 // If there are an odd number, the color should be flipped.
8956                 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
8957                 {
8958                         addFunctionCaseWithPrograms<InstanceContext>(
8959                                         moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
8960                                         createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
8961                 }
8962                 else
8963                 {
8964                         addFunctionCaseWithPrograms<InstanceContext>(
8965                                         moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
8966                                         createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
8967                 }
8968         }
8969         return moduleTests.release();
8970 }
8971
8972 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
8973 {
8974         switch (task)
8975         {
8976                 case SHADER_TASK_NONE:                  return "";
8977                 case SHADER_TASK_NORMAL:                return prefix + "_normal";
8978                 case SHADER_TASK_UNUSED_VAR:    return prefix + "_unused_var";
8979                 case SHADER_TASK_UNUSED_FUNC:   return prefix + "_unused_func";
8980                 default:                                                DE_ASSERT(DE_FALSE);
8981         }
8982         // unreachable
8983         return "";
8984 }
8985
8986 std::string getShaderTaskIndexName(ShaderTaskIndex index)
8987 {
8988         switch (index)
8989         {
8990         case SHADER_TASK_INDEX_VERTEX:                  return "vertex";
8991         case SHADER_TASK_INDEX_GEOMETRY:                return "geom";
8992         case SHADER_TASK_INDEX_TESS_CONTROL:    return "tessc";
8993         case SHADER_TASK_INDEX_TESS_EVAL:               return "tesse";
8994         case SHADER_TASK_INDEX_FRAGMENT:                return "frag";
8995         default:                                                                DE_ASSERT(DE_FALSE);
8996         }
8997         // unreachable
8998         return "";
8999 }
9000
9001 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9002 {
9003         std::string testName = location.toString();
9004
9005         for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9006         {
9007                 if (shaderTasks[i] != SHADER_TASK_NONE)
9008                 {
9009                         testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9010                 }
9011         }
9012
9013         return testName;
9014 }
9015
9016 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9017 {
9018         de::MovePtr<tcu::TestCaseGroup>         moduleTests                             (new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9019
9020         ShaderTaskArray                                         shaderCombinations[]    =
9021         {
9022                 // Vertex                                       Geometry                                        Tess. Control                           Tess. Evaluation                        Fragment
9023                 { SHADER_TASK_UNUSED_VAR,       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9024                 { SHADER_TASK_UNUSED_FUNC,      SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9025                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_VAR  },
9026                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_FUNC },
9027                 { SHADER_TASK_NORMAL,           SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9028                 { SHADER_TASK_NORMAL,           SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NONE,                       SHADER_TASK_NONE,                       SHADER_TASK_NORMAL      },
9029                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NORMAL,                     SHADER_TASK_NORMAL      },
9030                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NORMAL,                     SHADER_TASK_NORMAL      },
9031                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NORMAL,                     SHADER_TASK_UNUSED_VAR,         SHADER_TASK_NORMAL      },
9032                 { SHADER_TASK_NORMAL,           SHADER_TASK_NONE,                       SHADER_TASK_NORMAL,                     SHADER_TASK_UNUSED_FUNC,        SHADER_TASK_NORMAL      }
9033         };
9034
9035         const VariableLocation                          testLocations[] =
9036         {
9037                 // Set          Binding
9038                 { 0,            5                       },
9039                 { 5,            5                       },
9040         };
9041
9042         for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9043         {
9044                 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9045                 {
9046                         const ShaderTaskArray&  shaderTasks             = shaderCombinations[combNdx];
9047                         const VariableLocation& location                = testLocations[locationNdx];
9048                         std::string                             testName                = getUnusedVarTestName(shaderTasks, location);
9049
9050                         addFunctionCaseWithPrograms<UnusedVariableContext>(
9051                                 moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9052                                 createUnusedVariableContext(shaderTasks, location));
9053                 }
9054         }
9055
9056         return moduleTests.release();
9057 }
9058
9059 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9060 {
9061         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9062         RGBA defaultColors[4];
9063         getDefaultColors(defaultColors);
9064         map<string, string> fragments;
9065         fragments["pre_main"] =
9066                 "%c_f32_5 = OpConstant %f32 5.\n";
9067
9068         // A loop with a single block. The Continue Target is the loop block
9069         // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9070         // -- the "continue construct" forms the entire loop.
9071         fragments["testfun"] =
9072                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9073                 "%param1 = OpFunctionParameter %v4f32\n"
9074
9075                 "%entry = OpLabel\n"
9076                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9077                 "OpBranch %loop\n"
9078
9079                 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9080                 "%loop = OpLabel\n"
9081                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9082                 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9083                 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9084                 "%val = OpFAdd %f32 %val1 %delta\n"
9085                 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9086                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9087                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9088                 "OpLoopMerge %exit %loop None\n"
9089                 "OpBranchConditional %again %loop %exit\n"
9090
9091                 "%exit = OpLabel\n"
9092                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9093                 "OpReturnValue %result\n"
9094
9095                 "OpFunctionEnd\n";
9096
9097         createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9098
9099         // Body comprised of multiple basic blocks.
9100         const StringTemplate multiBlock(
9101                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9102                 "%param1 = OpFunctionParameter %v4f32\n"
9103
9104                 "%entry = OpLabel\n"
9105                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9106                 "OpBranch %loop\n"
9107
9108                 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9109                 "%loop = OpLabel\n"
9110                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %gather\n"
9111                 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %gather\n"
9112                 "%val1 = OpPhi %f32 %val0 %entry %val %gather\n"
9113                 // There are several possibilities for the Continue Target below.  Each
9114                 // will be specialized into a separate test case.
9115                 "OpLoopMerge %exit ${continue_target} None\n"
9116                 "OpBranch %if\n"
9117
9118                 "%if = OpLabel\n"
9119                 ";delta_next = (delta > 0) ? -1 : 1;\n"
9120                 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9121                 "OpSelectionMerge %gather DontFlatten\n"
9122                 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9123
9124                 "%odd = OpLabel\n"
9125                 "OpBranch %gather\n"
9126
9127                 "%even = OpLabel\n"
9128                 "OpBranch %gather\n"
9129
9130                 "%gather = OpLabel\n"
9131                 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9132                 "%val = OpFAdd %f32 %val1 %delta\n"
9133                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9134                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9135                 "OpBranchConditional %again %loop %exit\n"
9136
9137                 "%exit = OpLabel\n"
9138                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9139                 "OpReturnValue %result\n"
9140
9141                 "OpFunctionEnd\n");
9142
9143         map<string, string> continue_target;
9144
9145         // The Continue Target is the loop block itself.
9146         continue_target["continue_target"] = "%loop";
9147         fragments["testfun"] = multiBlock.specialize(continue_target);
9148         createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9149
9150         // The Continue Target is at the end of the loop.
9151         continue_target["continue_target"] = "%gather";
9152         fragments["testfun"] = multiBlock.specialize(continue_target);
9153         createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9154
9155         // A loop with continue statement.
9156         fragments["testfun"] =
9157                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9158                 "%param1 = OpFunctionParameter %v4f32\n"
9159
9160                 "%entry = OpLabel\n"
9161                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9162                 "OpBranch %loop\n"
9163
9164                 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9165                 "%loop = OpLabel\n"
9166                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9167                 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9168                 "OpLoopMerge %exit %continue None\n"
9169                 "OpBranch %if\n"
9170
9171                 "%if = OpLabel\n"
9172                 ";skip if %count==2\n"
9173                 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9174                 "OpSelectionMerge %continue DontFlatten\n"
9175                 "OpBranchConditional %eq2 %continue %body\n"
9176
9177                 "%body = OpLabel\n"
9178                 "%fcount = OpConvertSToF %f32 %count\n"
9179                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9180                 "OpBranch %continue\n"
9181
9182                 "%continue = OpLabel\n"
9183                 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9184                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9185                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9186                 "OpBranchConditional %again %loop %exit\n"
9187
9188                 "%exit = OpLabel\n"
9189                 "%same = OpFSub %f32 %val %c_f32_8\n"
9190                 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9191                 "OpReturnValue %result\n"
9192                 "OpFunctionEnd\n";
9193         createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9194
9195         // A loop with break.
9196         fragments["testfun"] =
9197                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9198                 "%param1 = OpFunctionParameter %v4f32\n"
9199
9200                 "%entry = OpLabel\n"
9201                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9202                 "%dot = OpDot %f32 %param1 %param1\n"
9203                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9204                 "%zero = OpConvertFToU %u32 %div\n"
9205                 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9206                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9207                 "OpBranch %loop\n"
9208
9209                 ";adds 4 and 3 to %val0 (exits early)\n"
9210                 "%loop = OpLabel\n"
9211                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9212                 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9213                 "OpLoopMerge %exit %continue None\n"
9214                 "OpBranch %if\n"
9215
9216                 "%if = OpLabel\n"
9217                 ";end loop if %count==%two\n"
9218                 "%above2 = OpSGreaterThan %bool %count %two\n"
9219                 "OpSelectionMerge %continue DontFlatten\n"
9220                 "OpBranchConditional %above2 %body %exit\n"
9221
9222                 "%body = OpLabel\n"
9223                 "%fcount = OpConvertSToF %f32 %count\n"
9224                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9225                 "OpBranch %continue\n"
9226
9227                 "%continue = OpLabel\n"
9228                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9229                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9230                 "OpBranchConditional %again %loop %exit\n"
9231
9232                 "%exit = OpLabel\n"
9233                 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9234                 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9235                 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9236                 "OpReturnValue %result\n"
9237                 "OpFunctionEnd\n";
9238         createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9239
9240         // A loop with return.
9241         fragments["testfun"] =
9242                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9243                 "%param1 = OpFunctionParameter %v4f32\n"
9244
9245                 "%entry = OpLabel\n"
9246                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9247                 "%dot = OpDot %f32 %param1 %param1\n"
9248                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9249                 "%zero = OpConvertFToU %u32 %div\n"
9250                 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9251                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9252                 "OpBranch %loop\n"
9253
9254                 ";returns early without modifying %param1\n"
9255                 "%loop = OpLabel\n"
9256                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9257                 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9258                 "OpLoopMerge %exit %continue None\n"
9259                 "OpBranch %if\n"
9260
9261                 "%if = OpLabel\n"
9262                 ";return if %count==%two\n"
9263                 "%above2 = OpSGreaterThan %bool %count %two\n"
9264                 "OpSelectionMerge %continue DontFlatten\n"
9265                 "OpBranchConditional %above2 %body %early_exit\n"
9266
9267                 "%early_exit = OpLabel\n"
9268                 "OpReturnValue %param1\n"
9269
9270                 "%body = OpLabel\n"
9271                 "%fcount = OpConvertSToF %f32 %count\n"
9272                 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9273                 "OpBranch %continue\n"
9274
9275                 "%continue = OpLabel\n"
9276                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9277                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9278                 "OpBranchConditional %again %loop %exit\n"
9279
9280                 "%exit = OpLabel\n"
9281                 ";should never get here, so return an incorrect result\n"
9282                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9283                 "OpReturnValue %result\n"
9284                 "OpFunctionEnd\n";
9285         createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9286
9287         // Continue inside a switch block to break to enclosing loop's merge block.
9288         // Matches roughly the following GLSL code:
9289         // for (; keep_going; keep_going = false)
9290         // {
9291         //     switch (int(param1.x))
9292         //     {
9293         //         case 0: continue;
9294         //         case 1: continue;
9295         //         default: continue;
9296         //     }
9297         //     dead code: modify return value to invalid result.
9298         // }
9299         fragments["pre_main"] =
9300                 "%fp_bool = OpTypePointer Function %bool\n"
9301                 "%true = OpConstantTrue %bool\n"
9302                 "%false = OpConstantFalse %bool\n";
9303
9304         fragments["testfun"] =
9305                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9306                 "%param1 = OpFunctionParameter %v4f32\n"
9307
9308                 "%entry = OpLabel\n"
9309                 "%keep_going = OpVariable %fp_bool Function\n"
9310                 "%val_ptr = OpVariable %fp_f32 Function\n"
9311                 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
9312                 "OpStore %keep_going %true\n"
9313                 "OpBranch %forloop_begin\n"
9314
9315                 "%forloop_begin = OpLabel\n"
9316                 "OpLoopMerge %forloop_merge %forloop_continue None\n"
9317                 "OpBranch %forloop\n"
9318
9319                 "%forloop = OpLabel\n"
9320                 "%for_condition = OpLoad %bool %keep_going\n"
9321                 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9322
9323                 "%forloop_body = OpLabel\n"
9324                 "OpStore %val_ptr %param1_x\n"
9325                 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9326
9327                 "OpSelectionMerge %switch_merge None\n"
9328                 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9329                 "%case_0 = OpLabel\n"
9330                 "OpBranch %forloop_continue\n"
9331                 "%case_1 = OpLabel\n"
9332                 "OpBranch %forloop_continue\n"
9333                 "%default = OpLabel\n"
9334                 "OpBranch %forloop_continue\n"
9335                 "%switch_merge = OpLabel\n"
9336                 ";should never get here, so change the return value to invalid result\n"
9337                 "OpStore %val_ptr %c_f32_1\n"
9338                 "OpBranch %forloop_continue\n"
9339
9340                 "%forloop_continue = OpLabel\n"
9341                 "OpStore %keep_going %false\n"
9342                 "OpBranch %forloop_begin\n"
9343                 "%forloop_merge = OpLabel\n"
9344
9345                 "%val = OpLoad %f32 %val_ptr\n"
9346                 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9347                 "OpReturnValue %result\n"
9348                 "OpFunctionEnd\n";
9349         createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9350
9351         return testGroup.release();
9352 }
9353
9354 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
9355 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9356 {
9357         de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9358         map<string, string> fragments;
9359
9360         // A barrier inside a function body.
9361         fragments["pre_main"] =
9362                 "%Workgroup = OpConstant %i32 2\n"
9363                 "%WorkgroupAcquireRelease = OpConstant %i32 0x108\n";
9364         fragments["testfun"] =
9365                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9366                 "%param1 = OpFunctionParameter %v4f32\n"
9367                 "%label_testfun = OpLabel\n"
9368                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9369                 "OpReturnValue %param1\n"
9370                 "OpFunctionEnd\n";
9371         addTessCtrlTest(testGroup.get(), "in_function", fragments);
9372
9373         // Common setup code for the following tests.
9374         fragments["pre_main"] =
9375                 "%Workgroup = OpConstant %i32 2\n"
9376                 "%WorkgroupAcquireRelease = OpConstant %i32 0x108\n"
9377                 "%c_f32_5 = OpConstant %f32 5.\n";
9378         const string setupPercentZero =  // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9379                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9380                 "%param1 = OpFunctionParameter %v4f32\n"
9381                 "%entry = OpLabel\n"
9382                 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9383                 "%dot = OpDot %f32 %param1 %param1\n"
9384                 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9385                 "%zero = OpConvertFToU %u32 %div\n";
9386
9387         // Barriers inside OpSwitch branches.
9388         fragments["testfun"] =
9389                 setupPercentZero +
9390                 "OpSelectionMerge %switch_exit None\n"
9391                 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9392
9393                 "%case1 = OpLabel\n"
9394                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9395                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9396                 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9397                 "OpBranch %switch_exit\n"
9398
9399                 "%switch_default = OpLabel\n"
9400                 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9401                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9402                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9403                 "OpBranch %switch_exit\n"
9404
9405                 "%case0 = OpLabel\n"
9406                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9407                 "OpBranch %switch_exit\n"
9408
9409                 "%switch_exit = OpLabel\n"
9410                 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9411                 "OpReturnValue %ret\n"
9412                 "OpFunctionEnd\n";
9413         addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9414
9415         // Barriers inside if-then-else.
9416         fragments["testfun"] =
9417                 setupPercentZero +
9418                 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9419                 "OpSelectionMerge %exit DontFlatten\n"
9420                 "OpBranchConditional %eq0 %then %else\n"
9421
9422                 "%else = OpLabel\n"
9423                 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9424                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9425                 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9426                 "OpBranch %exit\n"
9427
9428                 "%then = OpLabel\n"
9429                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9430                 "OpBranch %exit\n"
9431                 "%exit = OpLabel\n"
9432                 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9433                 "OpReturnValue %ret\n"
9434                 "OpFunctionEnd\n";
9435         addTessCtrlTest(testGroup.get(), "in_if", fragments);
9436
9437         // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9438         // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9439         fragments["testfun"] =
9440                 setupPercentZero +
9441                 "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9442                 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9443                 "OpSelectionMerge %exit DontFlatten\n"
9444                 "OpBranchConditional %thread0 %then %else\n"
9445
9446                 "%else = OpLabel\n"
9447                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9448                 "OpBranch %exit\n"
9449
9450                 "%then = OpLabel\n"
9451                 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9452                 "OpBranch %exit\n"
9453
9454                 "%exit = OpLabel\n"
9455                 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
9456                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9457                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9458                 "OpReturnValue %ret\n"
9459                 "OpFunctionEnd\n";
9460         addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9461
9462         // A barrier inside a loop.
9463         fragments["pre_main"] =
9464                 "%Workgroup = OpConstant %i32 2\n"
9465                 "%WorkgroupAcquireRelease = OpConstant %i32 0x108\n"
9466                 "%c_f32_10 = OpConstant %f32 10.\n";
9467         fragments["testfun"] =
9468                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9469                 "%param1 = OpFunctionParameter %v4f32\n"
9470                 "%entry = OpLabel\n"
9471                 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9472                 "OpBranch %loop\n"
9473
9474                 ";adds 4, 3, 2, and 1 to %val0\n"
9475                 "%loop = OpLabel\n"
9476                 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9477                 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9478                 "OpControlBarrier %Workgroup %Workgroup %WorkgroupAcquireRelease\n"
9479                 "%fcount = OpConvertSToF %f32 %count\n"
9480                 "%val = OpFAdd %f32 %val1 %fcount\n"
9481                 "%count__ = OpISub %i32 %count %c_i32_1\n"
9482                 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9483                 "OpLoopMerge %exit %loop None\n"
9484                 "OpBranchConditional %again %loop %exit\n"
9485
9486                 "%exit = OpLabel\n"
9487                 "%same = OpFSub %f32 %val %c_f32_10\n"
9488                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9489                 "OpReturnValue %ret\n"
9490                 "OpFunctionEnd\n";
9491         addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9492
9493         return testGroup.release();
9494 }
9495
9496 // Test for the OpFRem instruction.
9497 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9498 {
9499         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9500         map<string, string>                                     fragments;
9501         RGBA                                                            inputColors[4];
9502         RGBA                                                            outputColors[4];
9503
9504         fragments["pre_main"]                            =
9505                 "%c_f32_3 = OpConstant %f32 3.0\n"
9506                 "%c_f32_n3 = OpConstant %f32 -3.0\n"
9507                 "%c_f32_4 = OpConstant %f32 4.0\n"
9508                 "%c_f32_p75 = OpConstant %f32 0.75\n"
9509                 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9510                 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9511                 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9512
9513         // The test does the following.
9514         // vec4 result = (param1 * 8.0) - 4.0;
9515         // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9516         fragments["testfun"]                             =
9517                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9518                 "%param1 = OpFunctionParameter %v4f32\n"
9519                 "%label_testfun = OpLabel\n"
9520                 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9521                 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9522                 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9523                 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9524                 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9525                 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9526                 "OpReturnValue %xy_0_1\n"
9527                 "OpFunctionEnd\n";
9528
9529
9530         inputColors[0]          = RGBA(16,      16,             0, 255);
9531         inputColors[1]          = RGBA(232, 232,        0, 255);
9532         inputColors[2]          = RGBA(232, 16,         0, 255);
9533         inputColors[3]          = RGBA(16,      232,    0, 255);
9534
9535         outputColors[0]         = RGBA(64,      64,             0, 255);
9536         outputColors[1]         = RGBA(255, 255,        0, 255);
9537         outputColors[2]         = RGBA(255, 64,         0, 255);
9538         outputColors[3]         = RGBA(64,      255,    0, 255);
9539
9540         createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
9541         return testGroup.release();
9542 }
9543
9544 // Test for the OpSRem instruction.
9545 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
9546 {
9547         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
9548         map<string, string>                                     fragments;
9549
9550         fragments["pre_main"]                            =
9551                 "%c_f32_255 = OpConstant %f32 255.0\n"
9552                 "%c_i32_128 = OpConstant %i32 128\n"
9553                 "%c_i32_255 = OpConstant %i32 255\n"
9554                 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
9555                 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
9556                 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
9557
9558         // The test does the following.
9559         // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
9560         // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
9561         // return float(result + 128) / 255.0;
9562         fragments["testfun"]                             =
9563                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9564                 "%param1 = OpFunctionParameter %v4f32\n"
9565                 "%label_testfun = OpLabel\n"
9566                 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
9567                 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
9568                 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
9569                 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
9570                 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
9571                 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
9572                 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
9573                 "%x_out = OpSRem %i32 %x_in %y_in\n"
9574                 "%y_out = OpSRem %i32 %y_in %z_in\n"
9575                 "%z_out = OpSRem %i32 %z_in %x_in\n"
9576                 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
9577                 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
9578                 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
9579                 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
9580                 "OpReturnValue %float_out\n"
9581                 "OpFunctionEnd\n";
9582
9583         const struct CaseParams
9584         {
9585                 const char*             name;
9586                 const char*             failMessageTemplate;    // customized status message
9587                 qpTestResult    failResult;                             // override status on failure
9588                 int                             operands[4][3];                 // four (x, y, z) vectors of operands
9589                 int                             results[4][3];                  // four (x, y, z) vectors of results
9590         } cases[] =
9591         {
9592                 {
9593                         "positive",
9594                         "${reason}",
9595                         QP_TEST_RESULT_FAIL,
9596                         { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                 // operands
9597                         { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                 // results
9598                 },
9599                 {
9600                         "all",
9601                         "Inconsistent results, but within specification: ${reason}",
9602                         negFailResult,                                                                                                                  // negative operands, not required by the spec
9603                         { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },    // operands
9604                         { { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },    // results
9605                 },
9606         };
9607         // If either operand is negative the result is undefined. Some implementations may still return correct values.
9608
9609         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
9610         {
9611                 const CaseParams&       params                  = cases[caseNdx];
9612                 RGBA                            inputColors[4];
9613                 RGBA                            outputColors[4];
9614
9615                 for (int i = 0; i < 4; ++i)
9616                 {
9617                         inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
9618                         outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
9619                 }
9620
9621                 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
9622         }
9623
9624         return testGroup.release();
9625 }
9626
9627 // Test for the OpSMod instruction.
9628 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
9629 {
9630         de::MovePtr<tcu::TestCaseGroup>         testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
9631         map<string, string>                                     fragments;
9632
9633         fragments["pre_main"]                            =
9634                 "%c_f32_255 = OpConstant %f32 255.0\n"
9635                 "%c_i32_128 = OpConstant %i32 128\n"
9636                 "%c_i32_255 = OpConstant %i32 255\n"
9637                 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
9638                 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
9639                 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
9640
9641         // The test does the following.
9642         // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
9643         // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
9644         // return float(result + 128) / 255.0;
9645         fragments["testfun"]                             =
9646                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9647                 "%param1 = OpFunctionParameter %v4f32\n"
9648                 "%label_testfun = OpLabel\n"
9649                 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
9650                 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
9651                 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
9652                 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
9653                 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
9654                 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
9655                 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
9656                 "%x_out = OpSMod %i32 %x_in %y_in\n"
9657                 "%y_out = OpSMod %i32 %y_in %z_in\n"
9658                 "%z_out = OpSMod %i32 %z_in %x_in\n"
9659                 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
9660                 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
9661                 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
9662                 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
9663                 "OpReturnValue %float_out\n"
9664                 "OpFunctionEnd\n";
9665
9666         const struct CaseParams
9667         {
9668                 const char*             name;
9669                 const char*             failMessageTemplate;    // customized status message
9670                 qpTestResult    failResult;                             // override status on failure
9671                 int                             operands[4][3];                 // four (x, y, z) vectors of operands
9672                 int                             results[4][3];                  // four (x, y, z) vectors of results
9673         } cases[] =
9674         {
9675                 {
9676                         "positive",
9677                         "${reason}",
9678                         QP_TEST_RESULT_FAIL,
9679                         { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },                         // operands
9680                         { { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },                         // results
9681                 },
9682                 {
9683                         "all",
9684                         "Inconsistent results, but within specification: ${reason}",
9685                         negFailResult,                                                                                                                          // negative operands, not required by the spec
9686                         { { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },        // operands
9687                         { { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },        // results
9688                 },
9689         };
9690         // If either operand is negative the result is undefined. Some implementations may still return correct values.
9691
9692         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
9693         {
9694                 const CaseParams&       params                  = cases[caseNdx];
9695                 RGBA                            inputColors[4];
9696                 RGBA                            outputColors[4];
9697
9698                 for (int i = 0; i < 4; ++i)
9699                 {
9700                         inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
9701                         outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
9702                 }
9703
9704                 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
9705         }
9706         return testGroup.release();
9707 }
9708
9709 enum ConversionDataType
9710 {
9711         DATA_TYPE_SIGNED_8,
9712         DATA_TYPE_SIGNED_16,
9713         DATA_TYPE_SIGNED_32,
9714         DATA_TYPE_SIGNED_64,
9715         DATA_TYPE_UNSIGNED_8,
9716         DATA_TYPE_UNSIGNED_16,
9717         DATA_TYPE_UNSIGNED_32,
9718         DATA_TYPE_UNSIGNED_64,
9719         DATA_TYPE_FLOAT_16,
9720         DATA_TYPE_FLOAT_32,
9721         DATA_TYPE_FLOAT_64,
9722         DATA_TYPE_VEC2_SIGNED_16,
9723         DATA_TYPE_VEC2_SIGNED_32
9724 };
9725
9726 const string getBitWidthStr (ConversionDataType type)
9727 {
9728         switch (type)
9729         {
9730                 case DATA_TYPE_SIGNED_8:
9731                 case DATA_TYPE_UNSIGNED_8:
9732                         return "8";
9733
9734                 case DATA_TYPE_SIGNED_16:
9735                 case DATA_TYPE_UNSIGNED_16:
9736                 case DATA_TYPE_FLOAT_16:
9737                         return "16";
9738
9739                 case DATA_TYPE_SIGNED_32:
9740                 case DATA_TYPE_UNSIGNED_32:
9741                 case DATA_TYPE_FLOAT_32:
9742                 case DATA_TYPE_VEC2_SIGNED_16:
9743                         return "32";
9744
9745                 case DATA_TYPE_SIGNED_64:
9746                 case DATA_TYPE_UNSIGNED_64:
9747                 case DATA_TYPE_FLOAT_64:
9748                 case DATA_TYPE_VEC2_SIGNED_32:
9749                         return "64";
9750
9751                 default:
9752                         DE_ASSERT(false);
9753         }
9754         return "";
9755 }
9756
9757 const string getByteWidthStr (ConversionDataType type)
9758 {
9759         switch (type)
9760         {
9761                 case DATA_TYPE_SIGNED_8:
9762                 case DATA_TYPE_UNSIGNED_8:
9763                         return "1";
9764
9765                 case DATA_TYPE_SIGNED_16:
9766                 case DATA_TYPE_UNSIGNED_16:
9767                 case DATA_TYPE_FLOAT_16:
9768                         return "2";
9769
9770                 case DATA_TYPE_SIGNED_32:
9771                 case DATA_TYPE_UNSIGNED_32:
9772                 case DATA_TYPE_FLOAT_32:
9773                 case DATA_TYPE_VEC2_SIGNED_16:
9774                         return "4";
9775
9776                 case DATA_TYPE_SIGNED_64:
9777                 case DATA_TYPE_UNSIGNED_64:
9778                 case DATA_TYPE_FLOAT_64:
9779                 case DATA_TYPE_VEC2_SIGNED_32:
9780                         return "8";
9781
9782                 default:
9783                         DE_ASSERT(false);
9784         }
9785         return "";
9786 }
9787
9788 bool isSigned (ConversionDataType type)
9789 {
9790         switch (type)
9791         {
9792                 case DATA_TYPE_SIGNED_8:
9793                 case DATA_TYPE_SIGNED_16:
9794                 case DATA_TYPE_SIGNED_32:
9795                 case DATA_TYPE_SIGNED_64:
9796                 case DATA_TYPE_FLOAT_16:
9797                 case DATA_TYPE_FLOAT_32:
9798                 case DATA_TYPE_FLOAT_64:
9799                 case DATA_TYPE_VEC2_SIGNED_16:
9800                 case DATA_TYPE_VEC2_SIGNED_32:
9801                         return true;
9802
9803                 case DATA_TYPE_UNSIGNED_8:
9804                 case DATA_TYPE_UNSIGNED_16:
9805                 case DATA_TYPE_UNSIGNED_32:
9806                 case DATA_TYPE_UNSIGNED_64:
9807                         return false;
9808
9809                 default:
9810                         DE_ASSERT(false);
9811         }
9812         return false;
9813 }
9814
9815 bool isInt (ConversionDataType type)
9816 {
9817         switch (type)
9818         {
9819                 case DATA_TYPE_SIGNED_8:
9820                 case DATA_TYPE_SIGNED_16:
9821                 case DATA_TYPE_SIGNED_32:
9822                 case DATA_TYPE_SIGNED_64:
9823                 case DATA_TYPE_UNSIGNED_8:
9824                 case DATA_TYPE_UNSIGNED_16:
9825                 case DATA_TYPE_UNSIGNED_32:
9826                 case DATA_TYPE_UNSIGNED_64:
9827                         return true;
9828
9829                 case DATA_TYPE_FLOAT_16:
9830                 case DATA_TYPE_FLOAT_32:
9831                 case DATA_TYPE_FLOAT_64:
9832                 case DATA_TYPE_VEC2_SIGNED_16:
9833                 case DATA_TYPE_VEC2_SIGNED_32:
9834                         return false;
9835
9836                 default:
9837                         DE_ASSERT(false);
9838         }
9839         return false;
9840 }
9841
9842 bool isFloat (ConversionDataType type)
9843 {
9844         switch (type)
9845         {
9846                 case DATA_TYPE_SIGNED_8:
9847                 case DATA_TYPE_SIGNED_16:
9848                 case DATA_TYPE_SIGNED_32:
9849                 case DATA_TYPE_SIGNED_64:
9850                 case DATA_TYPE_UNSIGNED_8:
9851                 case DATA_TYPE_UNSIGNED_16:
9852                 case DATA_TYPE_UNSIGNED_32:
9853                 case DATA_TYPE_UNSIGNED_64:
9854                 case DATA_TYPE_VEC2_SIGNED_16:
9855                 case DATA_TYPE_VEC2_SIGNED_32:
9856                         return false;
9857
9858                 case DATA_TYPE_FLOAT_16:
9859                 case DATA_TYPE_FLOAT_32:
9860                 case DATA_TYPE_FLOAT_64:
9861                         return true;
9862
9863                 default:
9864                         DE_ASSERT(false);
9865         }
9866         return false;
9867 }
9868
9869 const string getTypeName (ConversionDataType type)
9870 {
9871         string prefix = isSigned(type) ? "" : "u";
9872
9873         if              (isInt(type))                                           return prefix + "int"   + getBitWidthStr(type);
9874         else if (isFloat(type))                                         return prefix + "float" + getBitWidthStr(type);
9875         else if (type == DATA_TYPE_VEC2_SIGNED_16)      return "i16vec2";
9876         else if (type == DATA_TYPE_VEC2_SIGNED_32)      return "i32vec2";
9877         else                                                                            DE_ASSERT(false);
9878
9879         return "";
9880 }
9881
9882 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
9883 {
9884         const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
9885
9886         return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
9887 }
9888
9889 const string getAsmTypeName (ConversionDataType type)
9890 {
9891         string prefix;
9892
9893         if              (isInt(type))                                           prefix = isSigned(type) ? "i" : "u";
9894         else if (isFloat(type))                                         prefix = "f";
9895         else if (type == DATA_TYPE_VEC2_SIGNED_16)      return "i16vec2";
9896         else if (type == DATA_TYPE_VEC2_SIGNED_32)      return "v2i32";
9897         else                                                                            DE_ASSERT(false);
9898
9899         return prefix + getBitWidthStr(type);
9900 }
9901
9902 template<typename T>
9903 BufferSp getSpecializedBuffer (deInt64 number)
9904 {
9905         return BufferSp(new Buffer<T>(vector<T>(1, (T)number)));
9906 }
9907
9908 BufferSp getBuffer (ConversionDataType type, deInt64 number)
9909 {
9910         switch (type)
9911         {
9912                 case DATA_TYPE_SIGNED_8:                return getSpecializedBuffer<deInt8>(number);
9913                 case DATA_TYPE_SIGNED_16:               return getSpecializedBuffer<deInt16>(number);
9914                 case DATA_TYPE_SIGNED_32:               return getSpecializedBuffer<deInt32>(number);
9915                 case DATA_TYPE_SIGNED_64:               return getSpecializedBuffer<deInt64>(number);
9916                 case DATA_TYPE_UNSIGNED_8:              return getSpecializedBuffer<deUint8>(number);
9917                 case DATA_TYPE_UNSIGNED_16:             return getSpecializedBuffer<deUint16>(number);
9918                 case DATA_TYPE_UNSIGNED_32:             return getSpecializedBuffer<deUint32>(number);
9919                 case DATA_TYPE_UNSIGNED_64:             return getSpecializedBuffer<deUint64>(number);
9920                 case DATA_TYPE_FLOAT_16:                return getSpecializedBuffer<deUint16>(number);
9921                 case DATA_TYPE_FLOAT_32:                return getSpecializedBuffer<deUint32>(number);
9922                 case DATA_TYPE_FLOAT_64:                return getSpecializedBuffer<deUint64>(number);
9923                 case DATA_TYPE_VEC2_SIGNED_16:  return getSpecializedBuffer<deUint32>(number);
9924                 case DATA_TYPE_VEC2_SIGNED_32:  return getSpecializedBuffer<deUint64>(number);
9925
9926                 default:                                                TCU_THROW(InternalError, "Unimplemented type passed");
9927         }
9928 }
9929
9930 bool usesInt8 (ConversionDataType from, ConversionDataType to)
9931 {
9932         return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
9933                         from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
9934 }
9935
9936 bool usesInt16 (ConversionDataType from, ConversionDataType to)
9937 {
9938         return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
9939                         from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
9940                         from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
9941 }
9942
9943 bool usesInt32 (ConversionDataType from, ConversionDataType to)
9944 {
9945         return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
9946                         from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
9947                         from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
9948 }
9949
9950 bool usesInt64 (ConversionDataType from, ConversionDataType to)
9951 {
9952         return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
9953                         from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
9954 }
9955
9956 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
9957 {
9958         return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
9959 }
9960
9961 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
9962 {
9963         return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
9964 }
9965
9966 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
9967 {
9968         return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
9969 }
9970
9971 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
9972 {
9973         if (usesInt16(from, to) && !usesInt32(from, to))
9974                 vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
9975
9976         if (usesInt64(from, to))
9977                 vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
9978
9979         if (usesFloat64(from, to))
9980                 vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
9981
9982         if (usesInt16(from, to) || usesFloat16(from, to))
9983         {
9984                 extensions.push_back("VK_KHR_16bit_storage");
9985                 vulkanFeatures.ext16BitStorage |= EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
9986         }
9987
9988         if (usesFloat16(from, to) || usesInt8(from, to))
9989         {
9990                 extensions.push_back("VK_KHR_shader_float16_int8");
9991
9992                 if (usesFloat16(from, to))
9993                 {
9994                         vulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_FLOAT16;
9995                 }
9996
9997                 if (usesInt8(from, to))
9998                 {
9999                         vulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_INT8;
10000
10001                         extensions.push_back("VK_KHR_8bit_storage");
10002                         vulkanFeatures.ext8BitStorage |= EXT8BITSTORAGEFEATURES_STORAGE_BUFFER;
10003                 }
10004         }
10005 }
10006
10007 struct ConvertCase
10008 {
10009         ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL)
10010         : m_fromType            (from)
10011         , m_toType                      (to)
10012         , m_name                        (getTestName(from, to, suffix))
10013         , m_inputBuffer         (getBuffer(from, number))
10014         {
10015                 string caps;
10016                 string decl;
10017                 string exts;
10018
10019                 m_asmTypes["inputType"]         = getAsmTypeName(from);
10020                 m_asmTypes["outputType"]        = getAsmTypeName(to);
10021
10022                 if (separateOutput)
10023                         m_outputBuffer = getBuffer(to, outputNumber);
10024                 else
10025                         m_outputBuffer = getBuffer(to, number);
10026
10027                 if (usesInt8(from, to))
10028                 {
10029                         bool requiresInt8Capability = true;
10030                         if (instruction == "OpUConvert" || instruction == "OpSConvert")
10031                         {
10032                                 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10033                                 if (usesInt32(from, to))
10034                                         requiresInt8Capability = false;
10035                         }
10036
10037                         caps += "OpCapability StorageBuffer8BitAccess\n";
10038                         if (requiresInt8Capability)
10039                                 caps += "OpCapability Int8\n";
10040
10041                         decl += "%i8         = OpTypeInt 8 1\n"
10042                                         "%u8         = OpTypeInt 8 0\n";
10043                         exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10044                 }
10045
10046                 if (usesInt16(from, to))
10047                 {
10048                         bool requiresInt16Capability = true;
10049
10050                         if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10051                         {
10052                                 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10053                                 if (usesInt32(from, to) || usesFloat32(from, to))
10054                                         requiresInt16Capability = false;
10055                         }
10056
10057                         decl += "%i16        = OpTypeInt 16 1\n"
10058                                         "%u16        = OpTypeInt 16 0\n"
10059                                         "%i16vec2    = OpTypeVector %i16 2\n";
10060
10061                         // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10062                         if (requiresInt16Capability)
10063                                 caps += "OpCapability Int16\n";
10064                 }
10065
10066                 if (usesFloat16(from, to))
10067                 {
10068                         decl += "%f16        = OpTypeFloat 16\n";
10069
10070                         // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10071                         if (!usesFloat32(from, to))
10072                                 caps += "OpCapability Float16\n";
10073                 }
10074
10075                 if (usesInt16(from, to) || usesFloat16(from, to))
10076                 {
10077                         caps += "OpCapability StorageUniformBufferBlock16\n";
10078                         exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10079                 }
10080
10081                 if (usesInt64(from, to))
10082                 {
10083                         caps += "OpCapability Int64\n";
10084                         decl += "%i64        = OpTypeInt 64 1\n"
10085                                         "%u64        = OpTypeInt 64 0\n";
10086                 }
10087
10088                 if (usesFloat64(from, to))
10089                 {
10090                         caps += "OpCapability Float64\n";
10091                         decl += "%f64        = OpTypeFloat 64\n";
10092                 }
10093
10094                 m_asmTypes["datatype_capabilities"]             = caps;
10095                 m_asmTypes["datatype_additional_decl"]  = decl;
10096                 m_asmTypes["datatype_extensions"]               = exts;
10097         }
10098
10099         ConversionDataType              m_fromType;
10100         ConversionDataType              m_toType;
10101         string                                  m_name;
10102         map<string, string>             m_asmTypes;
10103         BufferSp                                m_inputBuffer;
10104         BufferSp                                m_outputBuffer;
10105 };
10106
10107 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase)
10108 {
10109         map<string, string> params = convertCase.m_asmTypes;
10110
10111         params["instruction"]   = instruction;
10112         params["inDecorator"]   = getByteWidthStr(convertCase.m_fromType);
10113         params["outDecorator"]  = getByteWidthStr(convertCase.m_toType);
10114
10115         const StringTemplate shader (
10116                 "OpCapability Shader\n"
10117                 "${datatype_capabilities}"
10118                 "${datatype_extensions:opt}"
10119                 "OpMemoryModel Logical GLSL450\n"
10120                 "OpEntryPoint GLCompute %main \"main\"\n"
10121                 "OpExecutionMode %main LocalSize 1 1 1\n"
10122                 "OpSource GLSL 430\n"
10123                 "OpName %main           \"main\"\n"
10124                 // Decorators
10125                 "OpDecorate %indata DescriptorSet 0\n"
10126                 "OpDecorate %indata Binding 0\n"
10127                 "OpDecorate %outdata DescriptorSet 0\n"
10128                 "OpDecorate %outdata Binding 1\n"
10129                 "OpDecorate %in_buf BufferBlock\n"
10130                 "OpDecorate %out_buf BufferBlock\n"
10131                 "OpMemberDecorate %in_buf 0 Offset 0\n"
10132                 "OpMemberDecorate %out_buf 0 Offset 0\n"
10133                 // Base types
10134                 "%void       = OpTypeVoid\n"
10135                 "%voidf      = OpTypeFunction %void\n"
10136                 "%u32        = OpTypeInt 32 0\n"
10137                 "%i32        = OpTypeInt 32 1\n"
10138                 "%f32        = OpTypeFloat 32\n"
10139                 "%v2i32      = OpTypeVector %i32 2\n"
10140                 "${datatype_additional_decl}"
10141                 "%uvec3      = OpTypeVector %u32 3\n"
10142                 // Derived types
10143                 "%in_ptr     = OpTypePointer Uniform %${inputType}\n"
10144                 "%out_ptr    = OpTypePointer Uniform %${outputType}\n"
10145                 "%in_buf     = OpTypeStruct %${inputType}\n"
10146                 "%out_buf    = OpTypeStruct %${outputType}\n"
10147                 "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
10148                 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10149                 "%indata     = OpVariable %in_bufptr Uniform\n"
10150                 "%outdata    = OpVariable %out_bufptr Uniform\n"
10151                 // Constants
10152                 "%zero       = OpConstant %i32 0\n"
10153                 // Main function
10154                 "%main       = OpFunction %void None %voidf\n"
10155                 "%label      = OpLabel\n"
10156                 "%inloc      = OpAccessChain %in_ptr %indata %zero\n"
10157                 "%outloc     = OpAccessChain %out_ptr %outdata %zero\n"
10158                 "%inval      = OpLoad %${inputType} %inloc\n"
10159                 "%conv       = ${instruction} %${outputType} %inval\n"
10160                 "              OpStore %outloc %conv\n"
10161                 "              OpReturn\n"
10162                 "              OpFunctionEnd\n"
10163         );
10164
10165         return shader.specialize(params);
10166 }
10167
10168 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10169 {
10170         if (instruction == "OpUConvert")
10171         {
10172                 // Convert unsigned int to unsigned int
10173                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_16,          42));
10174                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_32,          73));
10175                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_UNSIGNED_64,          121));
10176
10177                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_8,           33));
10178                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_32,          60653));
10179                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_UNSIGNED_64,          17991));
10180
10181                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_64,          904256275));
10182                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_16,          6275));
10183                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_UNSIGNED_8,           17));
10184
10185                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_32,          701256243));
10186                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_16,          4741));
10187                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_UNSIGNED_8,           65));
10188
10189                 // Zero extension for int->uint
10190                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
10191                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   209));
10192                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   251));
10193                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
10194                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   62195));
10195                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
10196
10197                 // Truncate for int->uint
10198                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
10199                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
10200                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
10201                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
10202                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
10203                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
10204         }
10205         else if (instruction == "OpSConvert")
10206         {
10207                 // Sign extension int->int
10208                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_16,            -30));
10209                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_32,            55));
10210                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_SIGNED_64,            -3));
10211                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_32,            14669));
10212                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_64,            -3341));
10213                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_64,            973610259));
10214
10215                 // Truncate for int->int
10216                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_SIGNED_8,                     81));
10217                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_8,                     -93));
10218                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_8,                     3182748172687672ll,                                     true,   56));
10219                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_SIGNED_16,            12382));
10220                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_32,            -972812359));
10221                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_SIGNED_16,            -1067742499291926803ll,                         true,   -4371));
10222
10223                 // Sign extension for int->uint
10224                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_16,          56));
10225                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_32,          -47,                                                            true,   4294967249u));
10226                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_UNSIGNED_64,          -5,                                                                     true,   18446744073709551611ull));
10227                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_32,          14669));
10228                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_64,          -3341,                                                          true,   18446744073709548275ull));
10229                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_64,          973610259));
10230
10231                 // Truncate for int->uint
10232                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_UNSIGNED_8,           -25711,                                                         true,   145));
10233                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_8,           103));
10234                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_8,           -1067742499291926803ll,                         true,   237));
10235                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_UNSIGNED_16,          12382));
10236                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_32,          -972812359,                                                     true,   3322154937u));
10237                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_UNSIGNED_16,          -1067742499291926803ll,                         true,   61165));
10238
10239                 // Sign extension for uint->int
10240                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_16,            71));
10241                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_32,            201,                                                            true,   -55));
10242                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_SIGNED_64,            188,                                                            true,   -68));
10243                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_32,            14669));
10244                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_64,            62195,                                                          true,   -3341));
10245                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_64,            973610259));
10246
10247                 // Truncate for uint->int
10248                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_SIGNED_8,                     67));
10249                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_8,                     133,                                                            true,   -123));
10250                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_8,                     836927654193256494ull,                          true,   46));
10251                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_SIGNED_16,            12382));
10252                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_32,            18446744072736739257ull,                        true,   -972812359));
10253                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_SIGNED_16,            17379001574417624813ull,                        true,   -4371));
10254
10255                 // Convert i16vec2 to i32vec2 and vice versa
10256                 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10257                 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10258                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_VEC2_SIGNED_16,       DATA_TYPE_VEC2_SIGNED_32,       (33413u << 16)                  | 27593,        true,   (4294935173ull << 32)   | 27593));
10259                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_VEC2_SIGNED_32,       DATA_TYPE_VEC2_SIGNED_16,       (4294935173ull << 32)   | 27593,        true,   (33413u << 16)                  | 27593));
10260         }
10261         else if (instruction == "OpFConvert")
10262         {
10263                 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10264                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_64,                     0x449a4000,                                                     true,   0x4093480000000000));
10265                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_32,                     0x4093480000000000,                                     true,   0x449a4000));
10266
10267                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_FLOAT_16,                     0x449a4000,                                                     true,   0x64D2));
10268                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_32,                     0x64D2,                                                         true,   0x449a4000));
10269
10270                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_FLOAT_64,                     0x64D2,                                                         true,   0x4093480000000000));
10271                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_FLOAT_16,                     0x4093480000000000,                                     true,   0x64D2));
10272         }
10273         else if (instruction == "OpConvertFToU")
10274         {
10275                 // Normal numbers from uint8 range
10276                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x5020,                                                         true,   33,                                                                     "33"));
10277                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x42280000,                                                     true,   42,                                                                     "42"));
10278                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x4067800000000000ull,                          true,   188,                                                            "188"));
10279
10280                 // Maximum uint8 value
10281                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x5BF8,                                                         true,   255,                                                            "max"));
10282                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x437F0000,                                                     true,   255,                                                            "max"));
10283                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x406FE00000000000ull,                          true,   255,                                                            "max"));
10284
10285                 // +0
10286                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x0000,                                                         true,   0,                                                                      "p0"));
10287                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x00000000,                                                     true,   0,                                                                      "p0"));
10288                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x0000000000000000ull,                          true,   0,                                                                      "p0"));
10289
10290                 // -0
10291                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_8,           0x8000,                                                         true,   0,                                                                      "m0"));
10292                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_8,           0x80000000,                                                     true,   0,                                                                      "m0"));
10293                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_8,           0x8000000000000000ull,                          true,   0,                                                                      "m0"));
10294
10295                 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10296                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x64D2,                                                         true,   1234,                                                           "1234"));
10297                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x64D2,                                                         true,   1234,                                                           "1234"));
10298                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x64D2,                                                         true,   1234,                                                           "1234"));
10299
10300                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10301                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x7BFF,                                                         true,   65504,                                                          "max"));
10302                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x7BFF,                                                         true,   65504,                                                          "max"));
10303                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x7BFF,                                                         true,   65504,                                                          "max"));
10304
10305                 // +0
10306                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x0000,                                                         true,   0,                                                                      "p0"));
10307                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x0000,                                                         true,   0,                                                                      "p0"));
10308                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x0000,                                                         true,   0,                                                                      "p0"));
10309
10310                 // -0
10311                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_16,          0x8000,                                                         true,   0,                                                                      "m0"));
10312                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_32,          0x8000,                                                         true,   0,                                                                      "m0"));
10313                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_UNSIGNED_64,          0x8000,                                                         true,   0,                                                                      "m0"));
10314
10315                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_16,          0x449a4000,                                                     true,   1234));
10316                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_32,          0x449a4000,                                                     true,   1234));
10317                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_UNSIGNED_64,          0x449a4000,                                                     true,   1234));
10318                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_16,          0x4093480000000000,                                     true,   1234));
10319                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_32,          0x4093480000000000,                                     true,   1234));
10320                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_UNSIGNED_64,          0x4093480000000000,                                     true,   1234));
10321         }
10322         else if (instruction == "OpConvertUToF")
10323         {
10324                 // Normal numbers from uint8 range
10325                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_16,                     116,                                                            true,   0x5740,                                                         "116"));
10326                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_32,                     232,                                                            true,   0x43680000,                                                     "232"));
10327                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_64,                     164,                                                            true,   0x4064800000000000ull,                          "164"));
10328
10329                 // Maximum uint8 value
10330                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_16,                     255,                                                            true,   0x5BF8,                                                         "max"));
10331                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_32,                     255,                                                            true,   0x437F0000,                                                     "max"));
10332                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_8,           DATA_TYPE_FLOAT_64,                     255,                                                            true,   0x406FE00000000000ull,                          "max"));
10333
10334                 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10335                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234"));
10336                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234"));
10337                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_16,                     1234,                                                           true,   0x64D2,                                                         "1234"));
10338
10339                 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10340                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max"));
10341                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max"));
10342                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_16,                     65504,                                                          true,   0x7BFF,                                                         "max"));
10343
10344                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10345                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_16,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10346                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10347                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_32,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10348                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_32,                     1234,                                                           true,   0x449a4000));
10349                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_UNSIGNED_64,          DATA_TYPE_FLOAT_64,                     1234,                                                           true,   0x4093480000000000));
10350         }
10351         else if (instruction == "OpConvertFToS")
10352         {
10353                 // Normal numbers from int8 range
10354                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0xC980,                                                         true,   -11,                                                            "m11"));
10355                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0xC2140000,                                                     true,   -37,                                                            "m37"));
10356                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0xC050800000000000ull,                          true,   -66,                                                            "m66"));
10357
10358                 // Minimum int8 value
10359                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0xD800,                                                         true,   -128,                                                           "min"));
10360                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0xC3000000,                                                     true,   -128,                                                           "min"));
10361                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0xC060000000000000ull,                          true,   -128,                                                           "min"));
10362
10363                 // Maximum int8 value
10364                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x57F0,                                                         true,   127,                                                            "max"));
10365                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x42FE0000,                                                     true,   127,                                                            "max"));
10366                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x405FC00000000000ull,                          true,   127,                                                            "max"));
10367
10368                 // +0
10369                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x0000,                                                         true,   0,                                                                      "p0"));
10370                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x00000000,                                                     true,   0,                                                                      "p0"));
10371                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x0000000000000000ull,                          true,   0,                                                                      "p0"));
10372
10373                 // -0
10374                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_8,                     0x8000,                                                         true,   0,                                                                      "m0"));
10375                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_8,                     0x80000000,                                                     true,   0,                                                                      "m0"));
10376                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_8,                     0x8000000000000000ull,                          true,   0,                                                                      "m0"));
10377
10378                 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10379                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0xE4D2,                                                         true,   -1234,                                                          "m1234"));
10380                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0xE4D2,                                                         true,   -1234,                                                          "m1234"));
10381                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0xE4D2,                                                         true,   -1234,                                                          "m1234"));
10382
10383                 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10384                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0xF800,                                                         true,   -32768,                                                         "min"));
10385                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0xF800,                                                         true,   -32768,                                                         "min"));
10386                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0xF800,                                                         true,   -32768,                                                         "min"));
10387
10388                 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10389                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x77FF,                                                         true,   32752,                                                          "max"));
10390                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x77FF,                                                         true,   32752,                                                          "max"));
10391                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x77FF,                                                         true,   32752,                                                          "max"));
10392
10393                 // +0
10394                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x0000,                                                         true,   0,                                                                      "p0"));
10395                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x0000,                                                         true,   0,                                                                      "p0"));
10396                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x0000,                                                         true,   0,                                                                      "p0"));
10397
10398                 // -0
10399                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_16,            0x8000,                                                         true,   0,                                                                      "m0"));
10400                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_32,            0x8000,                                                         true,   0,                                                                      "m0"));
10401                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_16,                     DATA_TYPE_SIGNED_64,            0x8000,                                                         true,   0,                                                                      "m0"));
10402
10403                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0xc49a4000,                                                     true,   -1234));
10404                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_32,            0xc49a4000,                                                     true,   -1234));
10405                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_64,            0xc49a4000,                                                     true,   -1234));
10406                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_16,            0xc093480000000000,                                     true,   -1234));
10407                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_32,            0xc093480000000000,                                     true,   -1234));
10408                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_64,                     DATA_TYPE_SIGNED_64,            0xc093480000000000,                                     true,   -1234));
10409                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0x453b9000,                                                     true,    3001,                                                          "p3001"));
10410                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_FLOAT_32,                     DATA_TYPE_SIGNED_16,            0xc53b9000,                                                     true,   -3001,                                                          "m3001"));
10411         }
10412         else if (instruction == "OpConvertSToF")
10413         {
10414                 // Normal numbers from int8 range
10415                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     -12,                                                            true,   0xCA00,                                                         "m21"));
10416                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     -21,                                                            true,   0xC1A80000,                                                     "m21"));
10417                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     -99,                                                            true,   0xC058C00000000000ull,                          "m99"));
10418
10419                 // Minimum int8 value
10420                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     -128,                                                           true,   0xD800,                                                         "min"));
10421                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     -128,                                                           true,   0xC3000000,                                                     "min"));
10422                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     -128,                                                           true,   0xC060000000000000ull,                          "min"));
10423
10424                 // Maximum int8 value
10425                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_16,                     127,                                                            true,   0x57F0,                                                         "max"));
10426                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_32,                     127,                                                            true,   0x42FE0000,                                                     "max"));
10427                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_8,                     DATA_TYPE_FLOAT_64,                     127,                                                            true,   0x405FC00000000000ull,                          "max"));
10428
10429                 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10430                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234"));
10431                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234"));
10432                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -1234,                                                          true,   0xE4D2,                                                         "m1234"));
10433
10434                 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10435                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min"));
10436                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min"));
10437                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     -32768,                                                         true,   0xF800,                                                         "min"));
10438
10439                 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10440                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max"));
10441                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max"));
10442                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_16,                     32752,                                                          true,   0x77FF,                                                         "max"));
10443
10444                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
10445                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_16,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
10446                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
10447                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_32,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
10448                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_32,                     -1234,                                                          true,   0xc49a4000));
10449                 testCases.push_back(ConvertCase(instruction,    DATA_TYPE_SIGNED_64,            DATA_TYPE_FLOAT_64,                     -1234,                                                          true,   0xc093480000000000));
10450         }
10451         else
10452                 DE_FATAL("Unknown instruction");
10453 }
10454
10455 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
10456 {
10457         map<string, string> params = convertCase.m_asmTypes;
10458         map<string, string> fragments;
10459
10460         params["instruction"] = instruction;
10461         params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
10462
10463         const StringTemplate decoration (
10464                 "      OpDecorate %SSBOi DescriptorSet 0\n"
10465                 "      OpDecorate %SSBOo DescriptorSet 0\n"
10466                 "      OpDecorate %SSBOi Binding 0\n"
10467                 "      OpDecorate %SSBOo Binding 1\n"
10468                 "      OpDecorate %s_SSBOi Block\n"
10469                 "      OpDecorate %s_SSBOo Block\n"
10470                 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
10471                 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
10472
10473         const StringTemplate pre_main (
10474                 "${datatype_additional_decl:opt}"
10475                 "    %ptr_in = OpTypePointer StorageBuffer %${inputType}\n"
10476                 "   %ptr_out = OpTypePointer StorageBuffer %${outputType}\n"
10477                 "   %s_SSBOi = OpTypeStruct %${inputType}\n"
10478                 "   %s_SSBOo = OpTypeStruct %${outputType}\n"
10479                 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
10480                 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
10481                 "     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
10482                 "     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
10483
10484         const StringTemplate testfun (
10485                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10486                 "%param     = OpFunctionParameter %v4f32\n"
10487                 "%label     = OpLabel\n"
10488                 "%iLoc      = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
10489                 "%oLoc      = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
10490                 "%valIn     = OpLoad %${inputType} %iLoc\n"
10491                 "%valOut    = ${instruction} %${outputType} %valIn\n"
10492                 "             OpStore %oLoc %valOut\n"
10493                 "             OpReturnValue %param\n"
10494                 "             OpFunctionEnd\n");
10495
10496         params["datatype_extensions"] =
10497                 params["datatype_extensions"] +
10498                 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
10499
10500         fragments["capability"] = params["datatype_capabilities"];
10501         fragments["extension"]  = params["datatype_extensions"];
10502         fragments["decoration"] = decoration.specialize(params);
10503         fragments["pre_main"]   = pre_main.specialize(params);
10504         fragments["testfun"]    = testfun.specialize(params);
10505
10506         return fragments;
10507 }
10508
10509 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
10510 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
10511 {
10512         de::MovePtr<tcu::TestCaseGroup>         group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
10513         vector<ConvertCase>                                     testCases;
10514         createConvertCases(testCases, instruction);
10515
10516         for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
10517         {
10518                 ComputeShaderSpec spec;
10519                 spec.assembly                   = getConvertCaseShaderStr(instruction, *test);
10520                 spec.numWorkGroups              = IVec3(1, 1, 1);
10521                 spec.inputs.push_back   (test->m_inputBuffer);
10522                 spec.outputs.push_back  (test->m_outputBuffer);
10523
10524                 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, spec.requestedVulkanFeatures, spec.extensions);
10525
10526                 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
10527         }
10528         return group.release();
10529 }
10530
10531 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
10532 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
10533 {
10534         de::MovePtr<tcu::TestCaseGroup>         group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
10535         vector<ConvertCase>                                     testCases;
10536         createConvertCases(testCases, instruction);
10537
10538         for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
10539         {
10540                 map<string, string>     fragments               = getConvertCaseFragments(instruction, *test);
10541                 VulkanFeatures          vulkanFeatures;
10542                 GraphicsResources       resources;
10543                 vector<string>          extensions;
10544                 SpecConstants           noSpecConstants;
10545                 PushConstants           noPushConstants;
10546                 GraphicsInterfaces      noInterfaces;
10547                 tcu::RGBA                       defaultColors[4];
10548
10549                 getDefaultColors                        (defaultColors);
10550                 resources.inputs.push_back      (Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10551                 resources.outputs.push_back     (Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10552                 extensions.push_back            ("VK_KHR_storage_buffer_storage_class");
10553
10554                 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, vulkanFeatures, extensions);
10555
10556                 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics      = true;
10557                 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics            = true;
10558
10559                 createTestsForAllStages(
10560                         test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
10561                         noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
10562         }
10563         return group.release();
10564 }
10565
10566 // Constant-Creation Instructions: OpConstant, OpConstantComposite
10567 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
10568 {
10569         de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests                (new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
10570         RGBA                                                    inputColors[4];
10571         RGBA                                                    outputColors[4];
10572         vector<string>                                  extensions;
10573         GraphicsResources                               resources;
10574         VulkanFeatures                                  features;
10575
10576         const char                                              functionStart[]  =
10577                 "%test_code             = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10578                 "%param1                = OpFunctionParameter %v4f32\n"
10579                 "%lbl                   = OpLabel\n";
10580
10581         const char                                              functionEnd[]           =
10582                 "%transformed_param_32  = OpFConvert %v4f32 %transformed_param\n"
10583                 "                         OpReturnValue %transformed_param_32\n"
10584                 "                         OpFunctionEnd\n";
10585
10586         struct NameConstantsCode
10587         {
10588                 string name;
10589                 string constants;
10590                 string code;
10591         };
10592
10593 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
10594                         "%f16                  = OpTypeFloat 16\n"                                                 \
10595                         "%c_f16_0              = OpConstant %f16 0.0\n"                                            \
10596                         "%c_f16_0_5            = OpConstant %f16 0.5\n"                                            \
10597                         "%c_f16_1              = OpConstant %f16 1.0\n"                                            \
10598                         "%v4f16                = OpTypeVector %f16 4\n"                                            \
10599                         "%fp_f16               = OpTypePointer Function %f16\n"                                    \
10600                         "%fp_v4f16             = OpTypePointer Function %v4f16\n"                                  \
10601                         "%c_v4f16_1_1_1_1      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
10602                         "%a4f16                = OpTypeArray %f16 %c_u32_4\n"                                      \
10603
10604         NameConstantsCode                               tests[] =
10605         {
10606                 {
10607                         "vec4",
10608
10609                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10610                         "%cval                 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
10611                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10612                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %cval\n"
10613                 },
10614                 {
10615                         "struct",
10616
10617                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10618                         "%stype                = OpTypeStruct %v4f16 %f16\n"
10619                         "%fp_stype             = OpTypePointer Function %stype\n"
10620                         "%f16_n_1              = OpConstant %f16 -1.0\n"
10621                         "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
10622                         "%cvec                 = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
10623                         "%cval                 = OpConstantComposite %stype %cvec %f16_n_1\n",
10624
10625                         "%v                    = OpVariable %fp_stype Function %cval\n"
10626                         "%vec_ptr              = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
10627                         "%f16_ptr              = OpAccessChain %fp_f16 %v %c_u32_1\n"
10628                         "%vec_val              = OpLoad %v4f16 %vec_ptr\n"
10629                         "%f16_val              = OpLoad %f16 %f16_ptr\n"
10630                         "%tmp1                 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
10631                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10632                         "%tmp2                 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
10633                         "%transformed_param    = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
10634                 },
10635                 {
10636                         // [1|0|0|0.5] [x] = x + 0.5
10637                         // [0|1|0|0.5] [y] = y + 0.5
10638                         // [0|0|1|0.5] [z] = z + 0.5
10639                         // [0|0|0|1  ] [1] = 1
10640                         "matrix",
10641
10642                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10643                         "%mat4x4_f16           = OpTypeMatrix %v4f16 4\n"
10644                         "%v4f16_1_0_0_0        = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
10645                         "%v4f16_0_1_0_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
10646                         "%v4f16_0_0_1_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
10647                         "%v4f16_0_5_0_5_0_5_1  = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
10648                         "%cval                 = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
10649
10650                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10651                         "%transformed_param    = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
10652                 },
10653                 {
10654                         "array",
10655
10656                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10657                         "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
10658                         "%fp_a4f16             = OpTypePointer Function %a4f16\n"
10659                         "%f16_n_1              = OpConstant %f16 -1.0\n"
10660                         "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
10661                         "%carr                 = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
10662
10663                         "%v                    = OpVariable %fp_a4f16 Function %carr\n"
10664                         "%f                    = OpAccessChain %fp_f16 %v %c_u32_0\n"
10665                         "%f1                   = OpAccessChain %fp_f16 %v %c_u32_1\n"
10666                         "%f2                   = OpAccessChain %fp_f16 %v %c_u32_2\n"
10667                         "%f3                   = OpAccessChain %fp_f16 %v %c_u32_3\n"
10668                         "%f_val                = OpLoad %f16 %f\n"
10669                         "%f1_val               = OpLoad %f16 %f1\n"
10670                         "%f2_val               = OpLoad %f16 %f2\n"
10671                         "%f3_val               = OpLoad %f16 %f3\n"
10672                         "%ftot1                = OpFAdd %f16 %f_val %f1_val\n"
10673                         "%ftot2                = OpFAdd %f16 %ftot1 %f2_val\n"
10674                         "%ftot3                = OpFAdd %f16 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
10675                         "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
10676                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10677                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
10678                 },
10679                 {
10680                         //
10681                         // [
10682                         //   {
10683                         //      0.0,
10684                         //      [ 1.0, 1.0, 1.0, 1.0]
10685                         //   },
10686                         //   {
10687                         //      1.0,
10688                         //      [ 0.0, 0.5, 0.0, 0.0]
10689                         //   }, //     ^^^
10690                         //   {
10691                         //      0.0,
10692                         //      [ 1.0, 1.0, 1.0, 1.0]
10693                         //   }
10694                         // ]
10695                         "array_of_struct_of_array",
10696
10697                         FLOAT_16_COMMON_TYPES_AND_CONSTS
10698                         "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
10699                         "%fp_a4f16             = OpTypePointer Function %a4f16\n"
10700                         "%stype                = OpTypeStruct %f16 %a4f16\n"
10701                         "%a3stype              = OpTypeArray %stype %c_u32_3\n"
10702                         "%fp_a3stype           = OpTypePointer Function %a3stype\n"
10703                         "%ca4f16_0             = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
10704                         "%ca4f16_1             = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
10705                         "%cstype1              = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
10706                         "%cstype2              = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
10707                         "%carr                 = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
10708
10709                         "%v                    = OpVariable %fp_a3stype Function %carr\n"
10710                         "%f                    = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
10711                         "%f_l                  = OpLoad %f16 %f\n"
10712                         "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
10713                         "%param1_16            = OpFConvert %v4f16 %param1\n"
10714                         "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
10715                 }
10716         };
10717
10718         getHalfColorsFullAlpha(inputColors);
10719         outputColors[0] = RGBA(255, 255, 255, 255);
10720         outputColors[1] = RGBA(255, 127, 127, 255);
10721         outputColors[2] = RGBA(127, 255, 127, 255);
10722         outputColors[3] = RGBA(127, 127, 255, 255);
10723
10724         extensions.push_back("VK_KHR_shader_float16_int8");
10725         features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
10726
10727         for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
10728         {
10729                 map<string, string> fragments;
10730
10731                 fragments["capability"] = "OpCapability Float16\n";
10732                 fragments["pre_main"]   = tests[testNdx].constants;
10733                 fragments["testfun"]    = string(functionStart) + tests[testNdx].code + functionEnd;
10734
10735                 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
10736         }
10737         return opConstantCompositeTests.release();
10738 }
10739
10740 template<typename T>
10741 void finalizeTestsCreation (T&                                                  specResource,
10742                                                         const map<string, string>&      fragments,
10743                                                         tcu::TestContext&                       testCtx,
10744                                                         tcu::TestCaseGroup&                     testGroup,
10745                                                         const std::string&                      testName,
10746                                                         const VulkanFeatures&           vulkanFeatures,
10747                                                         const vector<string>&           extensions,
10748                                                         const IVec3&                            numWorkGroups);
10749
10750 template<>
10751 void finalizeTestsCreation (GraphicsResources&                  specResource,
10752                                                         const map<string, string>&      fragments,
10753                                                         tcu::TestContext&                       ,
10754                                                         tcu::TestCaseGroup&                     testGroup,
10755                                                         const std::string&                      testName,
10756                                                         const VulkanFeatures&           vulkanFeatures,
10757                                                         const vector<string>&           extensions,
10758                                                         const IVec3&                            )
10759 {
10760         RGBA defaultColors[4];
10761         getDefaultColors(defaultColors);
10762
10763         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures);
10764 }
10765
10766 template<>
10767 void finalizeTestsCreation (ComputeShaderSpec&                  specResource,
10768                                                         const map<string, string>&      fragments,
10769                                                         tcu::TestContext&                       testCtx,
10770                                                         tcu::TestCaseGroup&                     testGroup,
10771                                                         const std::string&                      testName,
10772                                                         const VulkanFeatures&           vulkanFeatures,
10773                                                         const vector<string>&           extensions,
10774                                                         const IVec3&                            numWorkGroups)
10775 {
10776         specResource.numWorkGroups = numWorkGroups;
10777         specResource.requestedVulkanFeatures = vulkanFeatures;
10778         specResource.extensions = extensions;
10779
10780         specResource.assembly = makeComputeShaderAssembly(fragments);
10781
10782         testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
10783 }
10784
10785 template<class SpecResource>
10786 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
10787 {
10788         const string                                            nan                                     = nanSupported ? "_nan" : "";
10789         const string                                            groupName                       = "logical" + nan;
10790         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
10791
10792         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
10793         const string                                            spvCapabilities         = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
10794         const string                                            spvExtensions           = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
10795         const string                                            spvExecutionMode        = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
10796         const deUint32                                          numDataPointsScalar     = 16;
10797         const deUint32                                          numDataPointsVector     = 14;
10798         const vector<deFloat16>                         float16DataScalar       = getFloat16s(rnd, numDataPointsScalar);
10799         const vector<deFloat16>                         float16DataVector       = getFloat16s(rnd, numDataPointsVector);
10800         const vector<deFloat16>                         float16Data1            = squarize(float16DataScalar, 0);                       // Total Size: square(sizeof(float16DataScalar))
10801         const vector<deFloat16>                         float16Data2            = squarize(float16DataScalar, 1);
10802         const vector<deFloat16>                         float16DataVec1         = squarizeVector(float16DataVector, 0);         // Total Size: 2 * (square(square(sizeof(float16DataVector))))
10803         const vector<deFloat16>                         float16DataVec2         = squarizeVector(float16DataVector, 1);
10804         const vector<deFloat16>                         float16OutDummy         (float16Data1.size(), 0);
10805         const vector<deFloat16>                         float16OutVecDummy      (float16DataVec1.size(), 0);
10806
10807         struct TestOp
10808         {
10809                 const char*             opCode;
10810                 VerifyIOFunc    verifyFuncNan;
10811                 VerifyIOFunc    verifyFuncNonNan;
10812                 const deUint32  argCount;
10813         };
10814
10815         const TestOp    testOps[]       =
10816         {
10817                 { "OpIsNan"                                             ,       compareFP16Logical<fp16isNan,                           true,  false, true>,    compareFP16Logical<fp16isNan,                           true,  false, false>,   1       },
10818                 { "OpIsInf"                                             ,       compareFP16Logical<fp16isInf,                           true,  false, true>,    compareFP16Logical<fp16isInf,                           true,  false, false>,   1       },
10819                 { "OpFOrdEqual"                                 ,       compareFP16Logical<fp16isEqual,                         false, true,  true>,    compareFP16Logical<fp16isEqual,                         false, true,  false>,   2       },
10820                 { "OpFUnordEqual"                               ,       compareFP16Logical<fp16isEqual,                         false, false, true>,    compareFP16Logical<fp16isEqual,                         false, false, false>,   2       },
10821                 { "OpFOrdNotEqual"                              ,       compareFP16Logical<fp16isUnequal,                       false, true,  true>,    compareFP16Logical<fp16isUnequal,                       false, true,  false>,   2       },
10822                 { "OpFUnordNotEqual"                    ,       compareFP16Logical<fp16isUnequal,                       false, false, true>,    compareFP16Logical<fp16isUnequal,                       false, false, false>,   2       },
10823                 { "OpFOrdLessThan"                              ,       compareFP16Logical<fp16isLess,                          false, true,  true>,    compareFP16Logical<fp16isLess,                          false, true,  false>,   2       },
10824                 { "OpFUnordLessThan"                    ,       compareFP16Logical<fp16isLess,                          false, false, true>,    compareFP16Logical<fp16isLess,                          false, false, false>,   2       },
10825                 { "OpFOrdGreaterThan"                   ,       compareFP16Logical<fp16isGreater,                       false, true,  true>,    compareFP16Logical<fp16isGreater,                       false, true,  false>,   2       },
10826                 { "OpFUnordGreaterThan"                 ,       compareFP16Logical<fp16isGreater,                       false, false, true>,    compareFP16Logical<fp16isGreater,                       false, false, false>,   2       },
10827                 { "OpFOrdLessThanEqual"                 ,       compareFP16Logical<fp16isLessOrEqual,           false, true,  true>,    compareFP16Logical<fp16isLessOrEqual,           false, true,  false>,   2       },
10828                 { "OpFUnordLessThanEqual"               ,       compareFP16Logical<fp16isLessOrEqual,           false, false, true>,    compareFP16Logical<fp16isLessOrEqual,           false, false, false>,   2       },
10829                 { "OpFOrdGreaterThanEqual"              ,       compareFP16Logical<fp16isGreaterOrEqual,        false, true,  true>,    compareFP16Logical<fp16isGreaterOrEqual,        false, true,  false>,   2       },
10830                 { "OpFUnordGreaterThanEqual"    ,       compareFP16Logical<fp16isGreaterOrEqual,        false, false, true>,    compareFP16Logical<fp16isGreaterOrEqual,        false, false, false>,   2       },
10831         };
10832
10833         { // scalar cases
10834                 const StringTemplate preMain
10835                 (
10836                         "      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
10837                         "     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
10838                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
10839                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
10840                         "            %f16 = OpTypeFloat 16\n"
10841                         "          %v2f16 = OpTypeVector %f16 2\n"
10842                         "        %c_f16_0 = OpConstant %f16 0.0\n"
10843                         "        %c_f16_1 = OpConstant %f16 1.0\n"
10844                         "         %up_u32 = OpTypePointer Uniform %u32\n"
10845                         "         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
10846                         "         %SSBO16 = OpTypeStruct %ra_u32\n"
10847                         "      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
10848                         "     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
10849                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
10850                         "      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
10851                         "      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
10852                         "       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
10853                 );
10854
10855                 const StringTemplate decoration
10856                 (
10857                         "OpDecorate %ra_u32 ArrayStride 4\n"
10858                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
10859                         "OpDecorate %SSBO16 BufferBlock\n"
10860                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
10861                         "OpDecorate %ssbo_src0 Binding 0\n"
10862                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
10863                         "OpDecorate %ssbo_src1 Binding 1\n"
10864                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
10865                         "OpDecorate %ssbo_dst Binding 2\n"
10866                 );
10867
10868                 const StringTemplate testFun
10869                 (
10870                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10871                         "    %param = OpFunctionParameter %v4f32\n"
10872
10873                         "    %entry = OpLabel\n"
10874                         "        %i = OpVariable %fp_i32 Function\n"
10875                         "             OpStore %i %c_i32_0\n"
10876                         "             OpBranch %loop\n"
10877
10878                         "     %loop = OpLabel\n"
10879                         "    %i_cmp = OpLoad %i32 %i\n"
10880                         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
10881                         "             OpLoopMerge %merge %next None\n"
10882                         "             OpBranchConditional %lt %write %merge\n"
10883
10884                         "    %write = OpLabel\n"
10885                         "      %ndx = OpLoad %i32 %i\n"
10886
10887                         " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
10888
10889                         "${op_arg1_calc}"
10890
10891                         " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
10892                         "  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
10893                         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
10894                         "             OpBranch %next\n"
10895
10896                         "     %next = OpLabel\n"
10897                         "    %i_cur = OpLoad %i32 %i\n"
10898                         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
10899                         "             OpStore %i %i_new\n"
10900                         "             OpBranch %loop\n"
10901
10902                         "    %merge = OpLabel\n"
10903                         "             OpReturnValue %param\n"
10904
10905                         "             OpFunctionEnd\n"
10906                 );
10907
10908                 const StringTemplate arg1Calc
10909                 (
10910                         " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
10911                 );
10912
10913                 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
10914                 {
10915                         const size_t            iterations              = float16Data1.size();
10916                         const TestOp&           testOp                  = testOps[testOpsIdx];
10917                         const string            testName                = de::toLower(string(testOp.opCode)) + "_scalar";
10918                         SpecResource            specResource;
10919                         map<string, string>     specs;
10920                         VulkanFeatures          features;
10921                         map<string, string>     fragments;
10922                         vector<string>          extensions;
10923
10924                         specs["num_data_points"]        = de::toString(iterations);
10925                         specs["op_code"]                        = testOp.opCode;
10926                         specs["op_arg1"]                        = (testOp.argCount == 1) ? "" : "%val_src1";
10927                         specs["op_arg1_calc"]           = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
10928
10929                         fragments["extension"]          = spvExtensions;
10930                         fragments["capability"]         = spvCapabilities;
10931                         fragments["execution_mode"]     = spvExecutionMode;
10932                         fragments["decoration"]         = decoration.specialize(specs);
10933                         fragments["pre_main"]           = preMain.specialize(specs);
10934                         fragments["testfun"]            = testFun.specialize(specs);
10935                         fragments["testfun"]            += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
10936                         if (testOp.argCount > 1)
10937                         {
10938                                 fragments["testfun"]    += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
10939                         }
10940                         fragments["testfun"]            += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
10941
10942                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10943                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10944                         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
10945                         specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
10946
10947                         extensions.push_back("VK_KHR_shader_float16_int8");
10948
10949                         if (nanSupported)
10950                         {
10951                                 extensions.push_back("VK_KHR_shader_float_controls");
10952
10953                                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
10954                         }
10955
10956                         features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
10957
10958                         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
10959                 }
10960         }
10961         { // vector cases
10962                 const StringTemplate preMain
10963                 (
10964                         "        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
10965                         "           %v2bool = OpTypeVector %bool 2\n"
10966                         "              %f16 = OpTypeFloat 16\n"
10967                         "          %c_f16_0 = OpConstant %f16 0.0\n"
10968                         "          %c_f16_1 = OpConstant %f16 1.0\n"
10969                         "            %v2f16 = OpTypeVector %f16 2\n"
10970                         "      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
10971                         "      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
10972                         "           %up_u32 = OpTypePointer Uniform %u32\n"
10973                         "           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
10974                         "           %SSBO16 = OpTypeStruct %ra_u32\n"
10975                         "        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
10976                         "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
10977                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
10978                         "        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
10979                         "        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
10980                         "         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
10981                 );
10982
10983                 const StringTemplate decoration
10984                 (
10985                         "OpDecorate %ra_u32 ArrayStride 4\n"
10986                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
10987                         "OpDecorate %SSBO16 BufferBlock\n"
10988                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
10989                         "OpDecorate %ssbo_src0 Binding 0\n"
10990                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
10991                         "OpDecorate %ssbo_src1 Binding 1\n"
10992                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
10993                         "OpDecorate %ssbo_dst Binding 2\n"
10994                 );
10995
10996                 const StringTemplate testFun
10997                 (
10998                         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10999                         "    %param = OpFunctionParameter %v4f32\n"
11000
11001                         "    %entry = OpLabel\n"
11002                         "        %i = OpVariable %fp_i32 Function\n"
11003                         "             OpStore %i %c_i32_0\n"
11004                         "             OpBranch %loop\n"
11005
11006                         "     %loop = OpLabel\n"
11007                         "    %i_cmp = OpLoad %i32 %i\n"
11008                         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11009                         "             OpLoopMerge %merge %next None\n"
11010                         "             OpBranchConditional %lt %write %merge\n"
11011
11012                         "    %write = OpLabel\n"
11013                         "      %ndx = OpLoad %i32 %i\n"
11014
11015                         " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11016
11017                         "${op_arg1_calc}"
11018
11019                         " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11020                         "  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11021                         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11022                         "             OpBranch %next\n"
11023
11024                         "     %next = OpLabel\n"
11025                         "    %i_cur = OpLoad %i32 %i\n"
11026                         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11027                         "             OpStore %i %i_new\n"
11028                         "             OpBranch %loop\n"
11029
11030                         "    %merge = OpLabel\n"
11031                         "             OpReturnValue %param\n"
11032
11033                         "             OpFunctionEnd\n"
11034                 );
11035
11036                 const StringTemplate arg1Calc
11037                 (
11038                         " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11039                 );
11040
11041                 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11042                 {
11043                         const deUint32          itemsPerVec     = 2;
11044                         const size_t            iterations      = float16DataVec1.size() / itemsPerVec;
11045                         const TestOp&           testOp          = testOps[testOpsIdx];
11046                         const string            testName        = de::toLower(string(testOp.opCode)) + "_vector";
11047                         SpecResource            specResource;
11048                         map<string, string>     specs;
11049                         vector<string>          extensions;
11050                         VulkanFeatures          features;
11051                         map<string, string>     fragments;
11052
11053                         specs["num_data_points"]        = de::toString(iterations);
11054                         specs["op_code"]                        = testOp.opCode;
11055                         specs["op_arg1"]                        = (testOp.argCount == 1) ? "" : "%val_src1";
11056                         specs["op_arg1_calc"]           = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11057
11058                         fragments["extension"]          = spvExtensions;
11059                         fragments["capability"]         = spvCapabilities;
11060                         fragments["execution_mode"]     = spvExecutionMode;
11061                         fragments["decoration"]         = decoration.specialize(specs);
11062                         fragments["pre_main"]           = preMain.specialize(specs);
11063                         fragments["testfun"]            = testFun.specialize(specs);
11064                         fragments["testfun"]            += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11065                         if (testOp.argCount > 1)
11066                         {
11067                                 fragments["testfun"]    += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11068                         }
11069                         fragments["testfun"]            += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11070
11071                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11072                         specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11073                         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11074                         specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11075
11076                         extensions.push_back("VK_KHR_shader_float16_int8");
11077
11078                         if (nanSupported)
11079                         {
11080                                 extensions.push_back("VK_KHR_shader_float_controls");
11081
11082                                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11083                         }
11084
11085                         features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
11086
11087                         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11088                 }
11089         }
11090
11091         return testGroup.release();
11092 }
11093
11094 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11095 {
11096         if (inputs.size() != 1 || outputAllocs.size() != 1)
11097                 return false;
11098
11099         vector<deUint8> input1Bytes;
11100
11101         inputs[0].getBytes(input1Bytes);
11102
11103         const deUint16* const   input1AsFP16    = (const deUint16*)&input1Bytes[0];
11104         const deUint16* const   outputAsFP16    = (const deUint16*)outputAllocs[0]->getHostPtr();
11105         std::string                             error;
11106
11107         for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11108         {
11109                 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11110                 {
11111                         log << TestLog::Message << error << TestLog::EndMessage;
11112
11113                         return false;
11114                 }
11115         }
11116
11117         return true;
11118 }
11119
11120 template<class SpecResource>
11121 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11122 {
11123         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11124
11125         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11126         const StringTemplate                            capabilities            ("OpCapability Float16\n");
11127         const deUint32                                          numDataPoints           = 256;
11128         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11129         const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
11130         map<string, string>                                     fragments;
11131
11132         struct TestType
11133         {
11134                 const deUint32  typeComponents;
11135                 const char*             typeName;
11136                 const char*             typeDecls;
11137                 const char*             typeStorage;
11138                 const string            loadFunc;
11139                 const string            storeFunc;
11140         };
11141
11142         const TestType  testTypes[]     =
11143         {
11144                 {
11145                         1,
11146                         "f16",
11147                         "      %v2f16 = OpTypeVector %f16 2\n"
11148                         "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11149                         "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11150                         "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11151                         " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11152                         "u32_hndp",
11153                         loadScalarF16FromUint,
11154                         storeScalarF16AsUint
11155                 },
11156                 {
11157                         2,
11158                         "v2f16",
11159                         "      %v2f16 = OpTypeVector %f16 2\n"
11160                         "  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11161                         "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11162                         "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11163                         "u32_ndp",
11164                         loadV2F16FromUint,
11165                         storeV2F16AsUint
11166                 },
11167                 {
11168                         4,
11169                         "v4f16",
11170                         "      %v2f16 = OpTypeVector %f16 2\n"
11171                         "      %v4f16 = OpTypeVector %f16 4\n"
11172                         "  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11173                         "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11174                         "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11175                         "ra_u32_2",
11176                         loadV4F16FromUints,
11177                         storeV4F16AsUints
11178                 },
11179         };
11180
11181         const StringTemplate preMain
11182         (
11183                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11184                 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11185                 "     %v2bool = OpTypeVector %bool 2\n"
11186                 "        %f16 = OpTypeFloat 16\n"
11187                 "    %c_f16_0 = OpConstant %f16 0.0\n"
11188
11189                 "${type_decls}"
11190
11191                 "  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11192                 "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11193                 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11194                 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11195                 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11196                 "         %up_u32 = OpTypePointer Uniform %u32\n"
11197                 "     %SSBO16 = OpTypeStruct %ra_${ts}\n"
11198                 "  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11199                 "   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11200                 "   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11201         );
11202
11203         const StringTemplate decoration
11204         (
11205                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11206                 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11207                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
11208                 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11209                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11210                 "OpDecorate %SSBO16 BufferBlock\n"
11211                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11212                 "OpDecorate %ssbo_src Binding 0\n"
11213                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11214                 "OpDecorate %ssbo_dst Binding 1\n"
11215         );
11216
11217         const StringTemplate testFun
11218         (
11219                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11220                 "    %param = OpFunctionParameter %v4f32\n"
11221                 "    %entry = OpLabel\n"
11222
11223                 "        %i = OpVariable %fp_i32 Function\n"
11224                 "             OpStore %i %c_i32_0\n"
11225                 "             OpBranch %loop\n"
11226
11227                 "     %loop = OpLabel\n"
11228                 "    %i_cmp = OpLoad %i32 %i\n"
11229                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11230                 "             OpLoopMerge %merge %next None\n"
11231                 "             OpBranchConditional %lt %write %merge\n"
11232
11233                 "    %write = OpLabel\n"
11234                 "      %ndx = OpLoad %i32 %i\n"
11235
11236                 "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11237                 "  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11238                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11239                 "             OpBranch %next\n"
11240
11241                 "     %next = OpLabel\n"
11242                 "    %i_cur = OpLoad %i32 %i\n"
11243                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11244                 "             OpStore %i %i_new\n"
11245                 "             OpBranch %loop\n"
11246
11247                 "    %merge = OpLabel\n"
11248                 "             OpReturnValue %param\n"
11249
11250                 "             OpFunctionEnd\n"
11251
11252                 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11253                 "   %param0 = OpFunctionParameter %${tt}\n"
11254                 " %entry_pf = OpLabel\n"
11255                 "     %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11256                 "             OpReturnValue %res0\n"
11257                 "             OpFunctionEnd\n"
11258         );
11259
11260         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11261         {
11262                 const TestType&         testType                = testTypes[testTypeIdx];
11263                 const string            testName                = testType.typeName;
11264                 const deUint32          itemsPerType    = testType.typeComponents;
11265                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11266                 const size_t            typeStride              = itemsPerType * sizeof(deFloat16);
11267                 SpecResource            specResource;
11268                 map<string, string>     specs;
11269                 VulkanFeatures          features;
11270                 vector<string>          extensions;
11271
11272                 specs["num_data_points"]        = de::toString(iterations);
11273                 specs["tt"]                                     = testType.typeName;
11274                 specs["ts"]                                     = testType.typeStorage;
11275                 specs["tt_stride"]                      = de::toString(typeStride);
11276                 specs["type_decls"]                     = testType.typeDecls;
11277
11278                 fragments["capability"]         = capabilities.specialize(specs);
11279                 fragments["decoration"]         = decoration.specialize(specs);
11280                 fragments["pre_main"]           = preMain.specialize(specs);
11281                 fragments["testfun"]            = testFun.specialize(specs);
11282                 fragments["testfun"]            += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11283                 fragments["testfun"]            += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11284
11285                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11286                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11287                 specResource.verifyIO = compareFP16FunctionSetFunc;
11288
11289                 extensions.push_back("VK_KHR_shader_float16_int8");
11290
11291                 features.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
11292
11293                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11294         }
11295
11296         return testGroup.release();
11297 }
11298
11299 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11300 {
11301         if (inputs.size() != 2 || outputAllocs.size() != 1)
11302                 return false;
11303
11304         vector<deUint8> input1Bytes;
11305         vector<deUint8> input2Bytes;
11306
11307         inputs[0].getBytes(input1Bytes);
11308         inputs[1].getBytes(input2Bytes);
11309
11310         DE_ASSERT(input1Bytes.size() > 0);
11311         DE_ASSERT(input2Bytes.size() > 0);
11312         DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11313
11314         const size_t                    iterations              = input2Bytes.size() / sizeof(deUint32);
11315         const size_t                    components              = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11316         const deFloat16* const  input1AsFP16    = (const deFloat16*)&input1Bytes[0];
11317         const deUint32* const   inputIndices    = (const deUint32*)&input2Bytes[0];
11318         const deFloat16* const  outputAsFP16    = (const deFloat16*)outputAllocs[0]->getHostPtr();
11319         std::string                             error;
11320
11321         DE_ASSERT(components == 2 || components == 4);
11322         DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11323
11324         for (size_t idx = 0; idx < iterations; ++idx)
11325         {
11326                 const deUint32  componentNdx    = inputIndices[idx];
11327
11328                 DE_ASSERT(componentNdx < components);
11329
11330                 const deFloat16 expected                = input1AsFP16[components * idx + componentNdx];
11331
11332                 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11333                 {
11334                         log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11335
11336                         return false;
11337                 }
11338         }
11339
11340         return true;
11341 }
11342
11343 template<class SpecResource>
11344 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11345 {
11346         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11347
11348         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11349         const deUint32                                          numDataPoints           = 256;
11350         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11351         const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
11352
11353         struct TestType
11354         {
11355                 const deUint32  typeComponents;
11356                 const size_t    typeStride;
11357                 const char*             typeName;
11358                 const char*             typeDecls;
11359         };
11360
11361         const TestType  testTypes[]     =
11362         {
11363                 {
11364                         2,
11365                         2 * sizeof(deFloat16),
11366                         "v2f16",
11367                         "      %v2f16 = OpTypeVector %f16 2\n"
11368                 },
11369                 {
11370                         3,
11371                         4 * sizeof(deFloat16),
11372                         "v3f16",
11373                         "      %v3f16 = OpTypeVector %f16 3\n"
11374                 },
11375                 {
11376                         4,
11377                         4 * sizeof(deFloat16),
11378                         "v4f16",
11379                         "      %v4f16 = OpTypeVector %f16 4\n"
11380                 },
11381         };
11382
11383         const StringTemplate preMain
11384         (
11385                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11386                 "        %f16 = OpTypeFloat 16\n"
11387
11388                 "${type_decl}"
11389
11390                 "   %up_${tt} = OpTypePointer Uniform %${tt}\n"
11391                 "   %ra_${tt} = OpTypeArray %${tt} %c_i32_ndp\n"
11392                 "   %SSBO_SRC = OpTypeStruct %ra_${tt}\n"
11393                 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
11394
11395                 "     %up_u32 = OpTypePointer Uniform %u32\n"
11396                 "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11397                 "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
11398                 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
11399
11400                 "     %up_f16 = OpTypePointer Uniform %f16\n"
11401                 "     %ra_f16 = OpTypeArray %f16 %c_i32_ndp\n"
11402                 "   %SSBO_DST = OpTypeStruct %ra_f16\n"
11403                 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
11404
11405                 "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
11406                 "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
11407                 "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
11408         );
11409
11410         const StringTemplate decoration
11411         (
11412                 "OpDecorate %ra_${tt} ArrayStride ${tt_stride}\n"
11413                 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
11414                 "OpDecorate %SSBO_SRC BufferBlock\n"
11415                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11416                 "OpDecorate %ssbo_src Binding 0\n"
11417
11418                 "OpDecorate %ra_u32 ArrayStride 4\n"
11419                 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
11420                 "OpDecorate %SSBO_IDX BufferBlock\n"
11421                 "OpDecorate %ssbo_idx DescriptorSet 0\n"
11422                 "OpDecorate %ssbo_idx Binding 1\n"
11423
11424                 "OpDecorate %ra_f16 ArrayStride 2\n"
11425                 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
11426                 "OpDecorate %SSBO_DST BufferBlock\n"
11427                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11428                 "OpDecorate %ssbo_dst Binding 2\n"
11429         );
11430
11431         const StringTemplate testFun
11432         (
11433                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11434                 "    %param = OpFunctionParameter %v4f32\n"
11435                 "    %entry = OpLabel\n"
11436
11437                 "        %i = OpVariable %fp_i32 Function\n"
11438                 "             OpStore %i %c_i32_0\n"
11439
11440                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
11441                 "             OpSelectionMerge %end_if None\n"
11442                 "             OpBranchConditional %will_run %run_test %end_if\n"
11443
11444                 " %run_test = OpLabel\n"
11445                 "             OpBranch %loop\n"
11446
11447                 "     %loop = OpLabel\n"
11448                 "    %i_cmp = OpLoad %i32 %i\n"
11449                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11450                 "             OpLoopMerge %merge %next None\n"
11451                 "             OpBranchConditional %lt %write %merge\n"
11452
11453                 "    %write = OpLabel\n"
11454                 "      %ndx = OpLoad %i32 %i\n"
11455
11456                 "      %src = OpAccessChain %up_${tt} %ssbo_src %c_i32_0 %ndx\n"
11457                 "  %val_src = OpLoad %${tt} %src\n"
11458
11459                 "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
11460                 "  %val_idx = OpLoad %u32 %src_idx\n"
11461
11462                 "  %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
11463                 "      %dst = OpAccessChain %up_f16 %ssbo_dst %c_i32_0 %ndx\n"
11464
11465                 "             OpStore %dst %val_dst\n"
11466                 "             OpBranch %next\n"
11467
11468                 "     %next = OpLabel\n"
11469                 "    %i_cur = OpLoad %i32 %i\n"
11470                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11471                 "             OpStore %i %i_new\n"
11472                 "             OpBranch %loop\n"
11473
11474                 "    %merge = OpLabel\n"
11475                 "             OpBranch %end_if\n"
11476                 "   %end_if = OpLabel\n"
11477                 "             OpReturnValue %param\n"
11478
11479                 "             OpFunctionEnd\n"
11480         );
11481
11482         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11483         {
11484                 const TestType&         testType                = testTypes[testTypeIdx];
11485                 const string            testName                = testType.typeName;
11486                 const size_t            itemsPerType    = testType.typeStride / sizeof(deFloat16);
11487                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11488                 SpecResource            specResource;
11489                 map<string, string>     specs;
11490                 VulkanFeatures          features;
11491                 vector<deUint32>        inputDataNdx;
11492                 map<string, string>     fragments;
11493                 vector<string>          extensions;
11494
11495                 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
11496                         inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
11497
11498                 specs["num_data_points"]        = de::toString(iterations);
11499                 specs["tt"]                                     = testType.typeName;
11500                 specs["tt_stride"]                      = de::toString(testType.typeStride);
11501                 specs["type_decl"]                      = testType.typeDecls;
11502
11503                 fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
11504                 fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
11505                 fragments["decoration"]         = decoration.specialize(specs);
11506                 fragments["pre_main"]           = preMain.specialize(specs);
11507                 fragments["testfun"]            = testFun.specialize(specs);
11508
11509                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11510                 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11511                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11512                 specResource.verifyIO = compareFP16VectorExtractFunc;
11513
11514                 extensions.push_back("VK_KHR_16bit_storage");
11515                 extensions.push_back("VK_KHR_shader_float16_int8");
11516
11517                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
11518                 features.ext16BitStorage        = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
11519
11520                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11521         }
11522
11523         return testGroup.release();
11524 }
11525
11526 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
11527 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11528 {
11529         if (inputs.size() != 2 || outputAllocs.size() != 1)
11530                 return false;
11531
11532         vector<deUint8> input1Bytes;
11533         vector<deUint8> input2Bytes;
11534
11535         inputs[0].getBytes(input1Bytes);
11536         inputs[1].getBytes(input2Bytes);
11537
11538         DE_ASSERT(input1Bytes.size() > 0);
11539         DE_ASSERT(input2Bytes.size() > 0);
11540         DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11541
11542         const size_t                    iterations                      = input2Bytes.size() / sizeof(deUint32);
11543         const size_t                    componentsStride        = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11544         const deFloat16* const  input1AsFP16            = (const deFloat16*)&input1Bytes[0];
11545         const deUint32* const   inputIndices            = (const deUint32*)&input2Bytes[0];
11546         const deFloat16* const  outputAsFP16            = (const deFloat16*)outputAllocs[0]->getHostPtr();
11547         const deFloat16                 magic                           = tcu::Float16(float(REPLACEMENT)).bits();
11548         std::string                             error;
11549
11550         DE_ASSERT(componentsStride == 2 || componentsStride == 4);
11551         DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
11552
11553         for (size_t idx = 0; idx < iterations; ++idx)
11554         {
11555                 const deFloat16*        inputVec                = &input1AsFP16[componentsStride * idx];
11556                 const deFloat16*        outputVec               = &outputAsFP16[componentsStride * idx];
11557                 const deUint32          replacedCompNdx = inputIndices[idx];
11558
11559                 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
11560
11561                 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
11562                 {
11563                         const deFloat16 expected        = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
11564
11565                         if (!compare16BitFloat(expected, outputVec[compNdx], error))
11566                         {
11567                                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
11568
11569                                 return false;
11570                         }
11571                 }
11572         }
11573
11574         return true;
11575 }
11576
11577 template<class SpecResource>
11578 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
11579 {
11580         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
11581
11582         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
11583         const deUint32                                          replacement                     = 42;
11584         const deUint32                                          numDataPoints           = 256;
11585         const vector<deFloat16>                         float16InputData        = getFloat16s(rnd, numDataPoints);
11586         const vector<deFloat16>                         float16OutputDummy      (float16InputData.size(), 0);
11587
11588         struct TestType
11589         {
11590                 const deUint32  typeComponents;
11591                 const size_t    typeStride;
11592                 const char*             typeName;
11593                 const char*             typeDecls;
11594                 VerifyIOFunc    verifyIOFunc;
11595         };
11596
11597         const TestType  testTypes[]     =
11598         {
11599                 {
11600                         2,
11601                         2 * sizeof(deFloat16),
11602                         "v2f16",
11603                         "      %v2f16 = OpTypeVector %f16 2\n",
11604                         compareFP16VectorInsertFunc<2, replacement>
11605                 },
11606                 {
11607                         3,
11608                         4 * sizeof(deFloat16),
11609                         "v3f16",
11610                         "      %v3f16 = OpTypeVector %f16 3\n",
11611                         compareFP16VectorInsertFunc<3, replacement>
11612                 },
11613                 {
11614                         4,
11615                         4 * sizeof(deFloat16),
11616                         "v4f16",
11617                         "      %v4f16 = OpTypeVector %f16 4\n",
11618                         compareFP16VectorInsertFunc<4, replacement>
11619                 },
11620         };
11621
11622         const StringTemplate preMain
11623         (
11624                 "  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11625                 "        %f16 = OpTypeFloat 16\n"
11626                 "  %c_f16_ins = OpConstant %f16 ${replacement}\n"
11627
11628                 "${type_decl}"
11629
11630                 "   %up_${tt} = OpTypePointer Uniform %${tt}\n"
11631                 "   %ra_${tt} = OpTypeArray %${tt} %c_i32_ndp\n"
11632                 "   %SSBO_SRC = OpTypeStruct %ra_${tt}\n"
11633                 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
11634
11635                 "     %up_u32 = OpTypePointer Uniform %u32\n"
11636                 "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11637                 "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
11638                 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
11639
11640                 "   %SSBO_DST = OpTypeStruct %ra_${tt}\n"
11641                 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
11642
11643                 "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
11644                 "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
11645                 "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
11646         );
11647
11648         const StringTemplate decoration
11649         (
11650                 "OpDecorate %ra_${tt} ArrayStride ${tt_stride}\n"
11651                 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
11652                 "OpDecorate %SSBO_SRC BufferBlock\n"
11653                 "OpDecorate %ssbo_src DescriptorSet 0\n"
11654                 "OpDecorate %ssbo_src Binding 0\n"
11655
11656                 "OpDecorate %ra_u32 ArrayStride 4\n"
11657                 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
11658                 "OpDecorate %SSBO_IDX BufferBlock\n"
11659                 "OpDecorate %ssbo_idx DescriptorSet 0\n"
11660                 "OpDecorate %ssbo_idx Binding 1\n"
11661
11662                 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
11663                 "OpDecorate %SSBO_DST BufferBlock\n"
11664                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11665                 "OpDecorate %ssbo_dst Binding 2\n"
11666         );
11667
11668         const StringTemplate testFun
11669         (
11670                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11671                 "    %param = OpFunctionParameter %v4f32\n"
11672                 "    %entry = OpLabel\n"
11673
11674                 "        %i = OpVariable %fp_i32 Function\n"
11675                 "             OpStore %i %c_i32_0\n"
11676
11677                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
11678                 "             OpSelectionMerge %end_if None\n"
11679                 "             OpBranchConditional %will_run %run_test %end_if\n"
11680
11681                 " %run_test = OpLabel\n"
11682                 "             OpBranch %loop\n"
11683
11684                 "     %loop = OpLabel\n"
11685                 "    %i_cmp = OpLoad %i32 %i\n"
11686                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11687                 "             OpLoopMerge %merge %next None\n"
11688                 "             OpBranchConditional %lt %write %merge\n"
11689
11690                 "    %write = OpLabel\n"
11691                 "      %ndx = OpLoad %i32 %i\n"
11692
11693                 "      %src = OpAccessChain %up_${tt} %ssbo_src %c_i32_0 %ndx\n"
11694                 "  %val_src = OpLoad %${tt} %src\n"
11695
11696                 "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
11697                 "  %val_idx = OpLoad %u32 %src_idx\n"
11698
11699                 "  %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
11700                 "      %dst = OpAccessChain %up_${tt} %ssbo_dst %c_i32_0 %ndx\n"
11701
11702                 "             OpStore %dst %val_dst\n"
11703                 "             OpBranch %next\n"
11704
11705                 "     %next = OpLabel\n"
11706                 "    %i_cur = OpLoad %i32 %i\n"
11707                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11708                 "             OpStore %i %i_new\n"
11709                 "             OpBranch %loop\n"
11710
11711                 "    %merge = OpLabel\n"
11712                 "             OpBranch %end_if\n"
11713                 "   %end_if = OpLabel\n"
11714                 "             OpReturnValue %param\n"
11715
11716                 "             OpFunctionEnd\n"
11717         );
11718
11719         for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11720         {
11721                 const TestType&         testType                = testTypes[testTypeIdx];
11722                 const string            testName                = testType.typeName;
11723                 const size_t            itemsPerType    = testType.typeStride / sizeof(deFloat16);
11724                 const size_t            iterations              = float16InputData.size() / itemsPerType;
11725                 SpecResource            specResource;
11726                 map<string, string>     specs;
11727                 VulkanFeatures          features;
11728                 vector<deUint32>        inputDataNdx;
11729                 map<string, string>     fragments;
11730                 vector<string>          extensions;
11731
11732                 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
11733                         inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
11734
11735                 specs["num_data_points"]        = de::toString(iterations);
11736                 specs["tt"]                                     = testType.typeName;
11737                 specs["tt_stride"]                      = de::toString(testType.typeStride);
11738                 specs["type_decl"]                      = testType.typeDecls;
11739                 specs["replacement"]            = de::toString(replacement);
11740
11741                 fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
11742                 fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
11743                 fragments["decoration"]         = decoration.specialize(specs);
11744                 fragments["pre_main"]           = preMain.specialize(specs);
11745                 fragments["testfun"]            = testFun.specialize(specs);
11746
11747                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11748                 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11749                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11750                 specResource.verifyIO = testType.verifyIOFunc;
11751
11752                 extensions.push_back("VK_KHR_16bit_storage");
11753                 extensions.push_back("VK_KHR_shader_float16_int8");
11754
11755                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
11756                 features.ext16BitStorage        = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
11757
11758                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11759         }
11760
11761         return testGroup.release();
11762 }
11763
11764 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
11765 {
11766         const size_t    compNdxCount    = (vec1Len + vec2Len + 1);
11767         const size_t    compNdxLimited  = iteration % (compNdxCount * compNdxCount);
11768         size_t                  comp;
11769
11770         switch (componentNdx)
11771         {
11772                 case 0: comp = compNdxLimited / compNdxCount; break;
11773                 case 1: comp = compNdxLimited % compNdxCount; break;
11774                 case 2: comp = 0; break;
11775                 case 3: comp = 1; break;
11776                 default: TCU_THROW(InternalError, "Impossible");
11777         }
11778
11779         if (comp >= vec1Len + vec2Len)
11780         {
11781                 validate = false;
11782                 return 0;
11783         }
11784         else
11785         {
11786                 validate = true;
11787                 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
11788         }
11789 }
11790
11791 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
11792 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11793 {
11794         DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
11795         DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
11796         DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
11797
11798         if (inputs.size() != 2 || outputAllocs.size() != 1)
11799                 return false;
11800
11801         vector<deUint8> input1Bytes;
11802         vector<deUint8> input2Bytes;
11803
11804         inputs[0].getBytes(input1Bytes);
11805         inputs[1].getBytes(input2Bytes);
11806
11807         DE_ASSERT(input1Bytes.size() > 0);
11808         DE_ASSERT(input2Bytes.size() > 0);
11809         DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
11810
11811         const size_t                    componentsStrideDst             = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
11812         const size_t                    componentsStrideSrc0    = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
11813         const size_t                    componentsStrideSrc1    = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
11814         const size_t                    iterations                              = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
11815         const deFloat16* const  input1AsFP16                    = (const deFloat16*)&input1Bytes[0];
11816         const deFloat16* const  input2AsFP16                    = (const deFloat16*)&input2Bytes[0];
11817         const deFloat16* const  outputAsFP16                    = (const deFloat16*)outputAllocs[0]->getHostPtr();
11818         std::string                             error;
11819
11820         DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
11821         DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
11822
11823         for (size_t idx = 0; idx < iterations; ++idx)
11824         {
11825                 const deFloat16*        input1Vec       = &input1AsFP16[componentsStrideSrc0 * idx];
11826                 const deFloat16*        input2Vec       = &input2AsFP16[componentsStrideSrc1 * idx];
11827                 const deFloat16*        outputVec       = &outputAsFP16[componentsStrideDst * idx];
11828
11829                 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
11830                 {
11831                         bool            validate        = true;
11832                         deFloat16       expected        = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
11833
11834                         if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
11835                         {
11836                                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
11837
11838                                 return false;
11839                         }
11840                 }
11841         }
11842
11843         return true;
11844 }
11845
11846 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
11847 {
11848         DE_ASSERT(dstComponentsCount <= 4);
11849         DE_ASSERT(src0ComponentsCount <= 4);
11850         DE_ASSERT(src1ComponentsCount <= 4);
11851         deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
11852
11853         switch (funcCode)
11854         {
11855                 case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
11856                 case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
11857                 case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
11858                 case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
11859                 case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
11860                 case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
11861                 case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
11862                 case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
11863                 case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
11864                 case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
11865                 case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
11866                 case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
11867                 case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
11868                 case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
11869                 case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
11870                 case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
11871                 case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
11872                 case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
11873                 case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
11874                 case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
11875                 case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
11876                 case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
11877                 case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
11878                 case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
11879                 case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
11880                 case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
11881                 case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
11882                 default: TCU_THROW(InternalError, "Invalid number of components specified.");
11883         }
11884 }
11885
11886 template<class SpecResource>
11887 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
11888 {
11889         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
11890         const int                                                       testSpecificSeed        = deStringHash(testGroup->getName());
11891         const int                                                       seed                            = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
11892         de::Random                                                      rnd                                     (seed);
11893         const deUint32                                          numDataPoints           = 128;
11894         map<string, string>                                     fragments;
11895
11896         struct TestType
11897         {
11898                 const deUint32  typeComponents;
11899                 const char*             typeName;
11900         };
11901
11902         const TestType  testTypes[]     =
11903         {
11904                 {
11905                         2,
11906                         "v2f16",
11907                 },
11908                 {
11909                         3,
11910                         "v3f16",
11911                 },
11912                 {
11913                         4,
11914                         "v4f16",
11915                 },
11916         };
11917
11918         const StringTemplate preMain
11919         (
11920                 "    %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11921                 "     %c_i32_cc = OpConstant %i32 ${case_count}\n"
11922                 "          %f16 = OpTypeFloat 16\n"
11923                 "        %v2f16 = OpTypeVector %f16 2\n"
11924                 "        %v3f16 = OpTypeVector %f16 3\n"
11925                 "        %v4f16 = OpTypeVector %f16 4\n"
11926
11927                 "     %up_v2f16 = OpTypePointer Uniform %v2f16\n"
11928                 "     %ra_v2f16 = OpTypeArray %v2f16 %c_i32_ndp\n"
11929                 "   %SSBO_v2f16 = OpTypeStruct %ra_v2f16\n"
11930                 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
11931
11932                 "     %up_v3f16 = OpTypePointer Uniform %v3f16\n"
11933                 "     %ra_v3f16 = OpTypeArray %v3f16 %c_i32_ndp\n"
11934                 "   %SSBO_v3f16 = OpTypeStruct %ra_v3f16\n"
11935                 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
11936
11937                 "     %up_v4f16 = OpTypePointer Uniform %v4f16\n"
11938                 "     %ra_v4f16 = OpTypeArray %v4f16 %c_i32_ndp\n"
11939                 "   %SSBO_v4f16 = OpTypeStruct %ra_v4f16\n"
11940                 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
11941
11942                 "        %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
11943
11944                 "    %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
11945                 "    %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
11946                 "     %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
11947         );
11948
11949         const StringTemplate decoration
11950         (
11951                 "OpDecorate %ra_v2f16 ArrayStride 4\n"
11952                 "OpDecorate %ra_v3f16 ArrayStride 8\n"
11953                 "OpDecorate %ra_v4f16 ArrayStride 8\n"
11954
11955                 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
11956                 "OpDecorate %SSBO_v2f16 BufferBlock\n"
11957
11958                 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
11959                 "OpDecorate %SSBO_v3f16 BufferBlock\n"
11960
11961                 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
11962                 "OpDecorate %SSBO_v4f16 BufferBlock\n"
11963
11964                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11965                 "OpDecorate %ssbo_src0 Binding 0\n"
11966                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11967                 "OpDecorate %ssbo_src1 Binding 1\n"
11968                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11969                 "OpDecorate %ssbo_dst Binding 2\n"
11970         );
11971
11972         const StringTemplate testFun
11973         (
11974                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11975                 "    %param = OpFunctionParameter %v4f32\n"
11976                 "    %entry = OpLabel\n"
11977
11978                 "        %i = OpVariable %fp_i32 Function\n"
11979                 "             OpStore %i %c_i32_0\n"
11980
11981                 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
11982                 "             OpSelectionMerge %end_if None\n"
11983                 "             OpBranchConditional %will_run %run_test %end_if\n"
11984
11985                 " %run_test = OpLabel\n"
11986                 "             OpBranch %loop\n"
11987
11988                 "     %loop = OpLabel\n"
11989                 "    %i_cmp = OpLoad %i32 %i\n"
11990                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11991                 "             OpLoopMerge %merge %next None\n"
11992                 "             OpBranchConditional %lt %write %merge\n"
11993
11994                 "    %write = OpLabel\n"
11995                 "      %ndx = OpLoad %i32 %i\n"
11996                 "     %src0 = OpAccessChain %up_${tt_src0} %ssbo_src0 %c_i32_0 %ndx\n"
11997                 " %val_src0 = OpLoad %${tt_src0} %src0\n"
11998                 "     %src1 = OpAccessChain %up_${tt_src1} %ssbo_src1 %c_i32_0 %ndx\n"
11999                 " %val_src1 = OpLoad %${tt_src1} %src1\n"
12000                 "  %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12001                 "      %dst = OpAccessChain %up_${tt_dst} %ssbo_dst %c_i32_0 %ndx\n"
12002                 "             OpStore %dst %val_dst\n"
12003                 "             OpBranch %next\n"
12004
12005                 "     %next = OpLabel\n"
12006                 "    %i_cur = OpLoad %i32 %i\n"
12007                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12008                 "             OpStore %i %i_new\n"
12009                 "             OpBranch %loop\n"
12010
12011                 "    %merge = OpLabel\n"
12012                 "             OpBranch %end_if\n"
12013                 "   %end_if = OpLabel\n"
12014                 "             OpReturnValue %param\n"
12015                 "             OpFunctionEnd\n"
12016                 "\n"
12017
12018                 "   %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12019                 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12020                 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12021                 "%sw_paramn = OpFunctionParameter %i32\n"
12022                 " %sw_entry = OpLabel\n"
12023                 "   %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12024                 "             OpSelectionMerge %switch_e None\n"
12025                 "             OpSwitch %modulo %default ${case_list}\n"
12026                 "${case_bodies}"
12027                 "%default   = OpLabel\n"
12028                 "             OpUnreachable\n" // Unreachable default case for switch statement
12029                 "%switch_e  = OpLabel\n"
12030                 "             OpUnreachable\n" // Unreachable merge block for switch statement
12031                 "             OpFunctionEnd\n"
12032         );
12033
12034         const StringTemplate testCaseBody
12035         (
12036                 "%case_${case_ndx}    = OpLabel\n"
12037                 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12038                 "             OpReturnValue %val_dst_${case_ndx}\n"
12039         );
12040
12041         for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12042         {
12043                 const TestType& dstType                 = testTypes[dstTypeIdx];
12044
12045                 for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12046                 {
12047                         const TestType& src0Type        = testTypes[comp0Idx];
12048
12049                         for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12050                         {
12051                                 const TestType&                 src1Type                        = testTypes[comp1Idx];
12052                                 const deUint32                  input0Stride            = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12053                                 const deUint32                  input1Stride            = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12054                                 const deUint32                  outputStride            = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12055                                 const vector<deFloat16> float16Input0Data       = getFloat16s(rnd, input0Stride * numDataPoints);
12056                                 const vector<deFloat16> float16Input1Data       = getFloat16s(rnd, input1Stride * numDataPoints);
12057                                 const vector<deFloat16> float16OutputDummy      (outputStride * numDataPoints, 0);
12058                                 const string                    testName                        = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12059                                 deUint32                                caseCount                       = 0;
12060                                 SpecResource                    specResource;
12061                                 map<string, string>             specs;
12062                                 vector<string>                  extensions;
12063                                 VulkanFeatures                  features;
12064                                 string                                  caseBodies;
12065                                 string                                  caseList;
12066
12067                                 // Generate case
12068                                 {
12069                                         vector<string>  componentList;
12070
12071                                         // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12072                                         {
12073                                                 deUint32                caseNo          = 0;
12074
12075                                                 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12076                                                         componentList.push_back(de::toString(caseNo++));
12077                                                 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12078                                                         componentList.push_back(de::toString(caseNo++));
12079                                                 componentList.push_back("0xFFFFFFFF");
12080                                         }
12081
12082                                         for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12083                                         {
12084                                                 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12085                                                 {
12086                                                         map<string, string>     specCase;
12087                                                         string                          shuffle         = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12088
12089                                                         for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12090                                                                 shuffle += " " + de::toString(compIdx - 2);
12091
12092                                                         specCase["case_ndx"]    = de::toString(caseCount);
12093                                                         specCase["shuffle"]             = shuffle;
12094                                                         specCase["tt_dst"]              = dstType.typeName;
12095
12096                                                         caseBodies      += testCaseBody.specialize(specCase);
12097                                                         caseList        += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12098
12099                                                         caseCount++;
12100                                                 }
12101                                         }
12102                                 }
12103
12104                                 specs["num_data_points"]        = de::toString(numDataPoints);
12105                                 specs["tt_dst"]                         = dstType.typeName;
12106                                 specs["tt_src0"]                        = src0Type.typeName;
12107                                 specs["tt_src1"]                        = src1Type.typeName;
12108                                 specs["case_bodies"]            = caseBodies;
12109                                 specs["case_list"]                      = caseList;
12110                                 specs["case_count"]                     = de::toString(caseCount);
12111
12112                                 fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
12113                                 fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
12114                                 fragments["decoration"]         = decoration.specialize(specs);
12115                                 fragments["pre_main"]           = preMain.specialize(specs);
12116                                 fragments["testfun"]            = testFun.specialize(specs);
12117
12118                                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12119                                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12120                                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12121                                 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12122
12123                                 extensions.push_back("VK_KHR_16bit_storage");
12124                                 extensions.push_back("VK_KHR_shader_float16_int8");
12125
12126                                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
12127                                 features.ext16BitStorage        = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
12128
12129                                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12130                         }
12131                 }
12132         }
12133
12134         return testGroup.release();
12135 }
12136
12137 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12138 {
12139         if (inputs.size() != 1 || outputAllocs.size() != 1)
12140                 return false;
12141
12142         vector<deUint8> input1Bytes;
12143
12144         inputs[0].getBytes(input1Bytes);
12145
12146         DE_ASSERT(input1Bytes.size() > 0);
12147         DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12148
12149         const size_t                    iterations              = input1Bytes.size() / sizeof(deFloat16);
12150         const deFloat16* const  input1AsFP16    = (const deFloat16*)&input1Bytes[0];
12151         const deFloat16* const  outputAsFP16    = (const deFloat16*)outputAllocs[0]->getHostPtr();
12152         const deFloat16                 exceptionValue  = tcu::Float16(-1.0).bits();
12153         std::string                             error;
12154
12155         for (size_t idx = 0; idx < iterations; ++idx)
12156         {
12157                 if (input1AsFP16[idx] == exceptionValue)
12158                         continue;
12159
12160                 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12161                 {
12162                         log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12163
12164                         return false;
12165                 }
12166         }
12167
12168         return true;
12169 }
12170
12171 template<class SpecResource>
12172 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12173 {
12174         de::MovePtr<tcu::TestCaseGroup>         testGroup                               (new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12175         const deUint32                                          numElements                             = 8;
12176         const string                                            testName                                = "struct";
12177         const deUint32                                          structItemsCount                = 88;
12178         const deUint32                                          exceptionIndices[]              = { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12179         const deFloat16                                         exceptionValue                  = tcu::Float16(-1.0).bits();
12180         const deUint32                                          fieldModifier                   = 2;
12181         const deUint32                                          fieldModifiedMulIndex   = 60;
12182         const deUint32                                          fieldModifiedAddIndex   = 66;
12183
12184         const StringTemplate preMain
12185         (
12186                 "    %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12187                 "          %f16 = OpTypeFloat 16\n"
12188                 "        %v2f16 = OpTypeVector %f16 2\n"
12189                 "        %v3f16 = OpTypeVector %f16 3\n"
12190                 "        %v4f16 = OpTypeVector %f16 4\n"
12191                 "    %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12192
12193                 "${consts}"
12194
12195                 "      %c_u32_5 = OpConstant %u32 5\n"
12196
12197                 " %f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12198                 " %v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12199                 " %v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12200                 " %v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12201                 " %v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12202                 " %struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12203                 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12204                 " %st_test      = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12205
12206                 "        %up_st = OpTypePointer Uniform %st_test\n"
12207                 "        %ra_st = OpTypeArray %st_test %c_i32_ndp\n"
12208                 "      %SSBO_st = OpTypeStruct %ra_st\n"
12209                 "   %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12210
12211                 "     %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12212         );
12213
12214         const StringTemplate decoration
12215         (
12216                 "OpDecorate %SSBO_st BufferBlock\n"
12217                 "OpDecorate %ra_st ArrayStride ${struct_item_size}\n"
12218                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12219                 "OpDecorate %ssbo_dst Binding 1\n"
12220
12221                 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
12222
12223                 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12224                 "OpMemberDecorate %struct16 0 Offset 0\n"
12225                 "OpMemberDecorate %struct16 1 Offset 4\n"
12226                 "OpDecorate %struct16arr3 ArrayStride 16\n"
12227                 "OpDecorate %f16arr3 ArrayStride 2\n"
12228                 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12229                 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12230                 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12231
12232                 "OpMemberDecorate %st_test 0 Offset 0\n"
12233                 "OpMemberDecorate %st_test 1 Offset 4\n"
12234                 "OpMemberDecorate %st_test 2 Offset 8\n"
12235                 "OpMemberDecorate %st_test 3 Offset 16\n"
12236                 "OpMemberDecorate %st_test 4 Offset 24\n"
12237                 "OpMemberDecorate %st_test 5 Offset 32\n"
12238                 "OpMemberDecorate %st_test 6 Offset 80\n"
12239                 "OpMemberDecorate %st_test 7 Offset 100\n"
12240                 "OpMemberDecorate %st_test 8 Offset 104\n"
12241                 "OpMemberDecorate %st_test 9 Offset 144\n"
12242         );
12243
12244         const StringTemplate testFun
12245         (
12246                 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12247                 "     %param = OpFunctionParameter %v4f32\n"
12248                 "     %entry = OpLabel\n"
12249
12250                 "         %i = OpVariable %fp_i32 Function\n"
12251                 "              OpStore %i %c_i32_0\n"
12252
12253                 "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12254                 "              OpSelectionMerge %end_if None\n"
12255                 "              OpBranchConditional %will_run %run_test %end_if\n"
12256
12257                 "  %run_test = OpLabel\n"
12258                 "              OpBranch %loop\n"
12259
12260                 "      %loop = OpLabel\n"
12261                 "     %i_cmp = OpLoad %i32 %i\n"
12262                 "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12263                 "              OpLoopMerge %merge %next None\n"
12264                 "              OpBranchConditional %lt %write %merge\n"
12265
12266                 "     %write = OpLabel\n"
12267                 "       %ndx = OpLoad %i32 %i\n"
12268
12269                 "      %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
12270                 "      %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
12271                 "      %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
12272
12273                 "      %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
12274
12275                 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
12276                 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
12277                 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
12278                 "  %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
12279                 "    %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
12280
12281                 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
12282                 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
12283                 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
12284                 "  %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
12285                 "    %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
12286
12287                 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
12288                 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
12289                 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
12290                 "  %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
12291                 "    %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
12292
12293                 "      %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
12294
12295                 "    %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
12296                 "    %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
12297                 "    %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
12298                 "    %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
12299                 "    %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
12300                 "      %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
12301
12302                 "      %fndx = OpConvertSToF %f16 %ndx\n"
12303                 "  %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
12304                 "  %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
12305
12306                 "   %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
12307                 "   %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
12308                 "    %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
12309                 "    %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
12310                 "    %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
12311                 "    %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
12312                 "    %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
12313                 "      %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
12314
12315                 "    %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
12316                 "    %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
12317                 "    %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
12318                 "      %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
12319
12320                 "    %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
12321                 "       %dst = OpAccessChain %up_st %ssbo_dst %c_i32_0 %ndx\n"
12322                 "              OpStore %dst %st_val\n"
12323
12324                 "              OpBranch %next\n"
12325
12326                 "      %next = OpLabel\n"
12327                 "     %i_cur = OpLoad %i32 %i\n"
12328                 "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12329                 "              OpStore %i %i_new\n"
12330                 "              OpBranch %loop\n"
12331
12332                 "     %merge = OpLabel\n"
12333                 "              OpBranch %end_if\n"
12334                 "    %end_if = OpLabel\n"
12335                 "              OpReturnValue %param\n"
12336                 "              OpFunctionEnd\n"
12337         );
12338
12339         {
12340                 SpecResource            specResource;
12341                 map<string, string>     specs;
12342                 VulkanFeatures          features;
12343                 map<string, string>     fragments;
12344                 vector<string>          extensions;
12345                 vector<deFloat16>       expectedOutput;
12346                 string                          consts;
12347
12348                 for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
12349                 {
12350                         vector<deFloat16>       expectedIterationOutput;
12351
12352                         for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
12353                                 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
12354
12355                         for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
12356                                 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
12357
12358                         expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
12359                         expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
12360
12361                         expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
12362                 }
12363
12364                 for (deUint32 i = 0; i < structItemsCount; ++i)
12365                         consts += "     %c_f16_" + de::toString(i) + " = OpConstant %f16 "  + de::toString(i) + "\n";
12366
12367                 specs["num_elements"]           = de::toString(numElements);
12368                 specs["struct_item_size"]       = de::toString(structItemsCount * sizeof(deFloat16));
12369                 specs["field_modifier"]         = de::toString(fieldModifier);
12370                 specs["consts"]                         = consts;
12371
12372                 fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
12373                 fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
12374                 fragments["decoration"]         = decoration.specialize(specs);
12375                 fragments["pre_main"]           = preMain.specialize(specs);
12376                 fragments["testfun"]            = testFun.specialize(specs);
12377
12378                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12379                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12380                 specResource.verifyIO = compareFP16CompositeFunc;
12381
12382                 extensions.push_back("VK_KHR_16bit_storage");
12383                 extensions.push_back("VK_KHR_shader_float16_int8");
12384
12385                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
12386                 features.ext16BitStorage        = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
12387
12388                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12389         }
12390
12391         return testGroup.release();
12392 }
12393
12394 template<class SpecResource>
12395 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
12396 {
12397         de::MovePtr<tcu::TestCaseGroup>         testGroup               (new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
12398         const deFloat16                                         exceptionValue  = tcu::Float16(-1.0).bits();
12399         const string                                            opName                  (op);
12400         const deUint32                                          opIndex                 = (opName == "OpCompositeInsert") ? 0
12401                                                                                                                 : (opName == "OpCompositeExtract") ? 1
12402                                                                                                                 : -1;
12403
12404         const StringTemplate preMain
12405         (
12406                 "   %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12407                 "         %f16 = OpTypeFloat 16\n"
12408                 "       %v2f16 = OpTypeVector %f16 2\n"
12409                 "       %v3f16 = OpTypeVector %f16 3\n"
12410                 "       %v4f16 = OpTypeVector %f16 4\n"
12411                 "    %c_f16_na = OpConstant %f16 -1.0\n"
12412                 "     %c_u32_5 = OpConstant %u32 5\n"
12413
12414                 "%f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12415                 "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12416                 "%v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12417                 "%v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12418                 "%v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12419                 "%struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12420                 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12421                 "%st_test      = OpTypeStruct %${field_type}\n"
12422
12423                 "      %up_f16 = OpTypePointer Uniform %f16\n"
12424                 "       %up_st = OpTypePointer Uniform %st_test\n"
12425                 "      %ra_f16 = OpTypeArray %f16 %c_i32_ndp\n"
12426                 "       %ra_st = OpTypeArray %st_test %c_i32_1\n"
12427
12428                 "${op_premain_decls}"
12429
12430                 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
12431                 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
12432
12433                 "    %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
12434                 "    %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
12435         );
12436
12437         const StringTemplate decoration
12438         (
12439                 "OpDecorate %SSBO_src BufferBlock\n"
12440                 "OpDecorate %SSBO_dst BufferBlock\n"
12441                 "OpDecorate %ra_f16 ArrayStride 2\n"
12442                 "OpDecorate %ra_st ArrayStride ${struct_item_size}\n"
12443                 "OpDecorate %ssbo_src DescriptorSet 0\n"
12444                 "OpDecorate %ssbo_src Binding 0\n"
12445                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12446                 "OpDecorate %ssbo_dst Binding 1\n"
12447
12448                 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
12449                 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
12450
12451                 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12452                 "OpMemberDecorate %struct16 0 Offset 0\n"
12453                 "OpMemberDecorate %struct16 1 Offset 4\n"
12454                 "OpDecorate %struct16arr3 ArrayStride 16\n"
12455                 "OpDecorate %f16arr3 ArrayStride 2\n"
12456                 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12457                 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12458                 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12459
12460                 "OpMemberDecorate %st_test 0 Offset 0\n"
12461         );
12462
12463         const StringTemplate testFun
12464         (
12465                 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12466                 "     %param = OpFunctionParameter %v4f32\n"
12467                 "     %entry = OpLabel\n"
12468
12469                 "         %i = OpVariable %fp_i32 Function\n"
12470                 "              OpStore %i %c_i32_0\n"
12471
12472                 "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12473                 "              OpSelectionMerge %end_if None\n"
12474                 "              OpBranchConditional %will_run %run_test %end_if\n"
12475
12476                 "  %run_test = OpLabel\n"
12477                 "              OpBranch %loop\n"
12478
12479                 "      %loop = OpLabel\n"
12480                 "     %i_cmp = OpLoad %i32 %i\n"
12481                 "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12482                 "              OpLoopMerge %merge %next None\n"
12483                 "              OpBranchConditional %lt %write %merge\n"
12484
12485                 "     %write = OpLabel\n"
12486                 "       %ndx = OpLoad %i32 %i\n"
12487
12488                 "${op_sw_fun_call}"
12489
12490                 "              OpStore %dst %val_dst\n"
12491                 "              OpBranch %next\n"
12492
12493                 "      %next = OpLabel\n"
12494                 "     %i_cur = OpLoad %i32 %i\n"
12495                 "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12496                 "              OpStore %i %i_new\n"
12497                 "              OpBranch %loop\n"
12498
12499                 "     %merge = OpLabel\n"
12500                 "              OpBranch %end_if\n"
12501                 "    %end_if = OpLabel\n"
12502                 "              OpReturnValue %param\n"
12503                 "              OpFunctionEnd\n"
12504
12505                 "${op_sw_fun_header}"
12506                 " %sw_param = OpFunctionParameter %st_test\n"
12507                 "%sw_paramn = OpFunctionParameter %i32\n"
12508                 " %sw_entry = OpLabel\n"
12509                 "             OpSelectionMerge %switch_e None\n"
12510                 "             OpSwitch %sw_paramn %default ${case_list}\n"
12511
12512                 "${case_bodies}"
12513
12514                 "%default   = OpLabel\n"
12515                 "             OpReturnValue ${op_case_default_value}\n"
12516                 "%switch_e  = OpLabel\n"
12517                 "             OpUnreachable\n" // Unreachable merge block for switch statement
12518                 "             OpFunctionEnd\n"
12519         );
12520
12521         const StringTemplate testCaseBody
12522         (
12523                 "%case_${case_ndx}    = OpLabel\n"
12524                 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
12525                 "             OpReturnValue %val_ret_${case_ndx}\n"
12526         );
12527
12528         struct OpParts
12529         {
12530                 const char*     premainDecls;
12531                 const char*     swFunCall;
12532                 const char*     swFunHeader;
12533                 const char*     caseDefaultValue;
12534                 const char*     argsPartial;
12535         };
12536
12537         OpParts                                                         opPartsArray[]                  =
12538         {
12539                 // OpCompositeInsert
12540                 {
12541                         "       %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
12542                         "    %SSBO_src = OpTypeStruct %ra_f16\n"
12543                         "    %SSBO_dst = OpTypeStruct %ra_st\n",
12544
12545                         "       %src = OpAccessChain %up_f16 %ssbo_src %c_i32_0 %ndx\n"
12546                         "       %dst = OpAccessChain %up_st %ssbo_dst %c_i32_0 %c_i32_0\n"
12547                         "   %val_new = OpLoad %f16 %src\n"
12548                         "   %val_old = OpLoad %st_test %dst\n"
12549                         "   %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
12550
12551                         "   %sw_fun = OpFunction %st_test None %fun_t\n"
12552                         "%sw_paramv = OpFunctionParameter %f16\n",
12553
12554                         "%sw_param",
12555
12556                         "%st_test %sw_paramv %sw_param",
12557                 },
12558                 // OpCompositeExtract
12559                 {
12560                         "       %fun_t = OpTypeFunction %f16 %st_test %i32\n"
12561                         "    %SSBO_src = OpTypeStruct %ra_st\n"
12562                         "    %SSBO_dst = OpTypeStruct %ra_f16\n",
12563
12564                         "       %src = OpAccessChain %up_st %ssbo_src %c_i32_0 %c_i32_0\n"
12565                         "       %dst = OpAccessChain %up_f16 %ssbo_dst %c_i32_0 %ndx\n"
12566                         "   %val_src = OpLoad %st_test %src\n"
12567                         "   %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
12568
12569                         "   %sw_fun = OpFunction %f16 None %fun_t\n",
12570
12571                         "%c_f16_na",
12572
12573                         "%f16 %sw_param",
12574                 },
12575         };
12576
12577         DE_ASSERT(opIndex >= 0 && opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
12578
12579         const char*     accessPathF16[] =
12580         {
12581                 "0",                    // %f16
12582                 DE_NULL,
12583         };
12584         const char*     accessPathV2F16[] =
12585         {
12586                 "0 0",                  // %v2f16
12587                 "0 1",
12588         };
12589         const char*     accessPathV3F16[] =
12590         {
12591                 "0 0",                  // %v3f16
12592                 "0 1",
12593                 "0 2",
12594                 DE_NULL,
12595         };
12596         const char*     accessPathV4F16[] =
12597         {
12598                 "0 0",                  // %v4f16"
12599                 "0 1",
12600                 "0 2",
12601                 "0 3",
12602         };
12603         const char*     accessPathF16Arr3[] =
12604         {
12605                 "0 0",                  // %f16arr3
12606                 "0 1",
12607                 "0 2",
12608                 DE_NULL,
12609         };
12610         const char*     accessPathStruct16Arr3[] =
12611         {
12612                 "0 0 0",                // %struct16arr3
12613                 DE_NULL,
12614                 "0 0 1 0 0",
12615                 "0 0 1 0 1",
12616                 "0 0 1 1 0",
12617                 "0 0 1 1 1",
12618                 "0 0 1 2 0",
12619                 "0 0 1 2 1",
12620                 "0 1 0",
12621                 DE_NULL,
12622                 "0 1 1 0 0",
12623                 "0 1 1 0 1",
12624                 "0 1 1 1 0",
12625                 "0 1 1 1 1",
12626                 "0 1 1 2 0",
12627                 "0 1 1 2 1",
12628                 "0 2 0",
12629                 DE_NULL,
12630                 "0 2 1 0 0",
12631                 "0 2 1 0 1",
12632                 "0 2 1 1 0",
12633                 "0 2 1 1 1",
12634                 "0 2 1 2 0",
12635                 "0 2 1 2 1",
12636         };
12637         const char*     accessPathV2F16Arr5[] =
12638         {
12639                 "0 0 0",                // %v2f16arr5
12640                 "0 0 1",
12641                 "0 1 0",
12642                 "0 1 1",
12643                 "0 2 0",
12644                 "0 2 1",
12645                 "0 3 0",
12646                 "0 3 1",
12647                 "0 4 0",
12648                 "0 4 1",
12649         };
12650         const char*     accessPathV3F16Arr5[] =
12651         {
12652                 "0 0 0",                // %v3f16arr5
12653                 "0 0 1",
12654                 "0 0 2",
12655                 DE_NULL,
12656                 "0 1 0",
12657                 "0 1 1",
12658                 "0 1 2",
12659                 DE_NULL,
12660                 "0 2 0",
12661                 "0 2 1",
12662                 "0 2 2",
12663                 DE_NULL,
12664                 "0 3 0",
12665                 "0 3 1",
12666                 "0 3 2",
12667                 DE_NULL,
12668                 "0 4 0",
12669                 "0 4 1",
12670                 "0 4 2",
12671                 DE_NULL,
12672         };
12673         const char*     accessPathV4F16Arr3[] =
12674         {
12675                 "0 0 0",                // %v4f16arr3
12676                 "0 0 1",
12677                 "0 0 2",
12678                 "0 0 3",
12679                 "0 1 0",
12680                 "0 1 1",
12681                 "0 1 2",
12682                 "0 1 3",
12683                 "0 2 0",
12684                 "0 2 1",
12685                 "0 2 2",
12686                 "0 2 3",
12687                 DE_NULL,
12688                 DE_NULL,
12689                 DE_NULL,
12690                 DE_NULL,
12691         };
12692
12693         struct TypeTestParameters
12694         {
12695                 const char*             name;
12696                 size_t                  accessPathLength;
12697                 const char**    accessPath;
12698         };
12699
12700         const TypeTestParameters typeTestParameters[] =
12701         {
12702                 {       "f16",                  DE_LENGTH_OF_ARRAY(accessPathF16),                      accessPathF16                   },
12703                 {       "v2f16",                DE_LENGTH_OF_ARRAY(accessPathV2F16),            accessPathV2F16                 },
12704                 {       "v3f16",                DE_LENGTH_OF_ARRAY(accessPathV3F16),            accessPathV3F16                 },
12705                 {       "v4f16",                DE_LENGTH_OF_ARRAY(accessPathV4F16),            accessPathV4F16                 },
12706                 {       "f16arr3",              DE_LENGTH_OF_ARRAY(accessPathF16Arr3),          accessPathF16Arr3               },
12707                 {       "v2f16arr5",    DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5),        accessPathV2F16Arr5             },
12708                 {       "v3f16arr5",    DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5),        accessPathV3F16Arr5             },
12709                 {       "v4f16arr3",    DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3),        accessPathV4F16Arr3             },
12710                 {       "struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3),     accessPathStruct16Arr3  },
12711         };
12712
12713         for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
12714         {
12715                 const OpParts           opParts                         = opPartsArray[opIndex];
12716                 const string            testName                        = typeTestParameters[typeTestNdx].name;
12717                 const size_t            structItemsCount        = typeTestParameters[typeTestNdx].accessPathLength;
12718                 const char**            accessPath                      = typeTestParameters[typeTestNdx].accessPath;
12719                 SpecResource            specResource;
12720                 map<string, string>     specs;
12721                 VulkanFeatures          features;
12722                 map<string, string>     fragments;
12723                 vector<string>          extensions;
12724                 vector<deFloat16>       inputFP16;
12725                 vector<deFloat16>       dummyFP16Output;
12726
12727                 // Generate values for input
12728                 inputFP16.reserve(structItemsCount);
12729                 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
12730                         inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
12731
12732                 dummyFP16Output.resize(structItemsCount);
12733
12734                 // Generate cases for OpSwitch
12735                 {
12736                         string  caseBodies;
12737                         string  caseList;
12738
12739                         for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
12740                                 if (accessPath[caseNdx] != DE_NULL)
12741                                 {
12742                                         map<string, string>     specCase;
12743
12744                                         specCase["case_ndx"]            = de::toString(caseNdx);
12745                                         specCase["access_path"]         = accessPath[caseNdx];
12746                                         specCase["op_args_part"]        = opParts.argsPartial;
12747                                         specCase["op_name"]                     = opName;
12748
12749                                         caseBodies      += testCaseBody.specialize(specCase);
12750                                         caseList        += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
12751                                 }
12752
12753                         specs["case_bodies"]    = caseBodies;
12754                         specs["case_list"]              = caseList;
12755                 }
12756
12757                 specs["num_elements"]                   = de::toString(structItemsCount);
12758                 specs["field_type"]                             = typeTestParameters[typeTestNdx].name;
12759                 specs["struct_item_size"]               = de::toString(structItemsCount * sizeof(deFloat16));
12760                 specs["op_premain_decls"]               = opParts.premainDecls;
12761                 specs["op_sw_fun_call"]                 = opParts.swFunCall;
12762                 specs["op_sw_fun_header"]               = opParts.swFunHeader;
12763                 specs["op_case_default_value"]  = opParts.caseDefaultValue;
12764
12765                 fragments["extension"]          = "OpExtension \"SPV_KHR_16bit_storage\"";
12766                 fragments["capability"]         = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
12767                 fragments["decoration"]         = decoration.specialize(specs);
12768                 fragments["pre_main"]           = preMain.specialize(specs);
12769                 fragments["testfun"]            = testFun.specialize(specs);
12770
12771                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12772                 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(dummyFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12773                 specResource.verifyIO = compareFP16CompositeFunc;
12774
12775                 extensions.push_back("VK_KHR_16bit_storage");
12776                 extensions.push_back("VK_KHR_shader_float16_int8");
12777
12778                 features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
12779                 features.ext16BitStorage        = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
12780
12781                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12782         }
12783
12784         return testGroup.release();
12785 }
12786
12787 struct fp16PerComponent
12788 {
12789         fp16PerComponent()
12790                 : flavor(0)
12791                 , floatFormat16 (-14, 15, 10, true)
12792                 , outCompCount(0)
12793                 , argCompCount(3, 0)
12794         {
12795         }
12796
12797         bool                    callOncePerComponent    ()                                                                      { return true; }
12798         deUint32                getComponentValidity    ()                                                                      { return static_cast<deUint32>(-1); }
12799
12800         virtual double  getULPs                                 (vector<const deFloat16*>&)                     { return 1.0; }
12801         virtual double  getMin                                  (double value, double ulps)                     { return value - floatFormat16.ulp(deAbs(value), ulps); }
12802         virtual double  getMax                                  (double value, double ulps)                     { return value + floatFormat16.ulp(deAbs(value), ulps); }
12803
12804         virtual size_t  getFlavorCount                  ()                                                                      { return flavorNames.empty() ? 1 : flavorNames.size(); }
12805         virtual void    setFlavor                               (size_t flavorNo)                                       { DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
12806         virtual size_t  getFlavor                               ()                                                                      { return flavor; }
12807         virtual string  getCurrentFlavorName    ()                                                                      { return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
12808
12809         virtual void    setOutCompCount                 (size_t compCount)                                      { outCompCount = compCount; }
12810         virtual size_t  getOutCompCount                 ()                                                                      { return outCompCount; }
12811
12812         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)        { argCompCount[argNo] = compCount; }
12813         virtual size_t  getArgCompCount                 (size_t argNo)                                          { return argCompCount[argNo]; }
12814
12815 protected:
12816         size_t                          flavor;
12817         tcu::FloatFormat        floatFormat16;
12818         size_t                          outCompCount;
12819         vector<size_t>          argCompCount;
12820         vector<string>          flavorNames;
12821 };
12822
12823 struct fp16OpFNegate : public fp16PerComponent
12824 {
12825         template <class fp16type>
12826         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12827         {
12828                 const fp16type  x               (*in[0]);
12829                 const double    d               (x.asDouble());
12830                 const double    result  (0.0 - d);
12831
12832                 out[0] = fp16type(result).bits();
12833                 min[0] = getMin(result, getULPs(in));
12834                 max[0] = getMax(result, getULPs(in));
12835
12836                 return true;
12837         }
12838 };
12839
12840 struct fp16Round : public fp16PerComponent
12841 {
12842         fp16Round() : fp16PerComponent()
12843         {
12844                 flavorNames.push_back("Floor(x+0.5)");
12845                 flavorNames.push_back("Floor(x-0.5)");
12846                 flavorNames.push_back("RoundEven");
12847         }
12848
12849         template<class fp16type>
12850         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12851         {
12852                 const fp16type  x               (*in[0]);
12853                 const double    d               (x.asDouble());
12854                 double                  result  (0.0);
12855
12856                 switch (flavor)
12857                 {
12858                         case 0:         result = deRound(d);            break;
12859                         case 1:         result = deFloor(d - 0.5);      break;
12860                         case 2:         result = deRoundEven(d);        break;
12861                         default:        TCU_THROW(InternalError, "Invalid flavor specified");
12862                 }
12863
12864                 out[0] = fp16type(result).bits();
12865                 min[0] = getMin(result, getULPs(in));
12866                 max[0] = getMax(result, getULPs(in));
12867
12868                 return true;
12869         }
12870 };
12871
12872 struct fp16RoundEven : public fp16PerComponent
12873 {
12874         template<class fp16type>
12875         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12876         {
12877                 const fp16type  x               (*in[0]);
12878                 const double    d               (x.asDouble());
12879                 const double    result  (deRoundEven(d));
12880
12881                 out[0] = fp16type(result).bits();
12882                 min[0] = getMin(result, getULPs(in));
12883                 max[0] = getMax(result, getULPs(in));
12884
12885                 return true;
12886         }
12887 };
12888
12889 struct fp16Trunc : public fp16PerComponent
12890 {
12891         template<class fp16type>
12892         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12893         {
12894                 const fp16type  x               (*in[0]);
12895                 const double    d               (x.asDouble());
12896                 const double    result  (deTrunc(d));
12897
12898                 out[0] = fp16type(result).bits();
12899                 min[0] = getMin(result, getULPs(in));
12900                 max[0] = getMax(result, getULPs(in));
12901
12902                 return true;
12903         }
12904 };
12905
12906 struct fp16FAbs : public fp16PerComponent
12907 {
12908         template<class fp16type>
12909         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12910         {
12911                 const fp16type  x               (*in[0]);
12912                 const double    d               (x.asDouble());
12913                 const double    result  (deAbs(d));
12914
12915                 out[0] = fp16type(result).bits();
12916                 min[0] = getMin(result, getULPs(in));
12917                 max[0] = getMax(result, getULPs(in));
12918
12919                 return true;
12920         }
12921 };
12922
12923 struct fp16FSign : public fp16PerComponent
12924 {
12925         template<class fp16type>
12926         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12927         {
12928                 const fp16type  x               (*in[0]);
12929                 const double    d               (x.asDouble());
12930                 const double    result  (deSign(d));
12931
12932                 if (x.isNaN())
12933                         return false;
12934
12935                 out[0] = fp16type(result).bits();
12936                 min[0] = getMin(result, getULPs(in));
12937                 max[0] = getMax(result, getULPs(in));
12938
12939                 return true;
12940         }
12941 };
12942
12943 struct fp16Floor : public fp16PerComponent
12944 {
12945         template<class fp16type>
12946         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12947         {
12948                 const fp16type  x               (*in[0]);
12949                 const double    d               (x.asDouble());
12950                 const double    result  (deFloor(d));
12951
12952                 out[0] = fp16type(result).bits();
12953                 min[0] = getMin(result, getULPs(in));
12954                 max[0] = getMax(result, getULPs(in));
12955
12956                 return true;
12957         }
12958 };
12959
12960 struct fp16Ceil : public fp16PerComponent
12961 {
12962         template<class fp16type>
12963         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12964         {
12965                 const fp16type  x               (*in[0]);
12966                 const double    d               (x.asDouble());
12967                 const double    result  (deCeil(d));
12968
12969                 out[0] = fp16type(result).bits();
12970                 min[0] = getMin(result, getULPs(in));
12971                 max[0] = getMax(result, getULPs(in));
12972
12973                 return true;
12974         }
12975 };
12976
12977 struct fp16Fract : public fp16PerComponent
12978 {
12979         template<class fp16type>
12980         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
12981         {
12982                 const fp16type  x               (*in[0]);
12983                 const double    d               (x.asDouble());
12984                 const double    result  (deFrac(d));
12985
12986                 out[0] = fp16type(result).bits();
12987                 min[0] = getMin(result, getULPs(in));
12988                 max[0] = getMax(result, getULPs(in));
12989
12990                 return true;
12991         }
12992 };
12993
12994 struct fp16Radians : public fp16PerComponent
12995 {
12996         virtual double getULPs (vector<const deFloat16*>& in)
12997         {
12998                 DE_UNREF(in);
12999
13000                 return 2.5;
13001         }
13002
13003         template<class fp16type>
13004         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13005         {
13006                 const fp16type  x               (*in[0]);
13007                 const float             d               (x.asFloat());
13008                 const float             result  (deFloatRadians(d));
13009
13010                 out[0] = fp16type(result).bits();
13011                 min[0] = getMin(result, getULPs(in));
13012                 max[0] = getMax(result, getULPs(in));
13013
13014                 return true;
13015         }
13016 };
13017
13018 struct fp16Degrees : public fp16PerComponent
13019 {
13020         virtual double getULPs (vector<const deFloat16*>& in)
13021         {
13022                 DE_UNREF(in);
13023
13024                 return 2.5;
13025         }
13026
13027         template<class fp16type>
13028         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13029         {
13030                 const fp16type  x               (*in[0]);
13031                 const float             d               (x.asFloat());
13032                 const float             result  (deFloatDegrees(d));
13033
13034                 out[0] = fp16type(result).bits();
13035                 min[0] = getMin(result, getULPs(in));
13036                 max[0] = getMax(result, getULPs(in));
13037
13038                 return true;
13039         }
13040 };
13041
13042 struct fp16Sin : public fp16PerComponent
13043 {
13044         template<class fp16type>
13045         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13046         {
13047                 const fp16type  x                       (*in[0]);
13048                 const double    d                       (x.asDouble());
13049                 const double    result          (deSin(d));
13050                 const double    unspecUlp       (16.0);
13051                 const double    err                     (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
13052
13053                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
13054                         return false;
13055
13056                 out[0] = fp16type(result).bits();
13057                 min[0] = result - err;
13058                 max[0] = result + err;
13059
13060                 return true;
13061         }
13062 };
13063
13064 struct fp16Cos : public fp16PerComponent
13065 {
13066         template<class fp16type>
13067         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13068         {
13069                 const fp16type  x                       (*in[0]);
13070                 const double    d                       (x.asDouble());
13071                 const double    result          (deCos(d));
13072                 const double    unspecUlp       (16.0);
13073                 const double    err                     (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
13074
13075                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
13076                         return false;
13077
13078                 out[0] = fp16type(result).bits();
13079                 min[0] = result - err;
13080                 max[0] = result + err;
13081
13082                 return true;
13083         }
13084 };
13085
13086 struct fp16Tan : public fp16PerComponent
13087 {
13088         template<class fp16type>
13089         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13090         {
13091                 const fp16type  x               (*in[0]);
13092                 const double    d               (x.asDouble());
13093                 const double    result  (deTan(d));
13094
13095                 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
13096                         return false;
13097
13098                 out[0] = fp16type(result).bits();
13099                 {
13100                         const double    err                     = deLdExp(1.0, -7);
13101                         const double    s1                      = deSin(d) + err;
13102                         const double    s2                      = deSin(d) - err;
13103                         const double    c1                      = deCos(d) + err;
13104                         const double    c2                      = deCos(d) - err;
13105                         const double    edgeVals[]      = {s1/c1, s1/c2, s2/c1, s2/c2};
13106                         double                  edgeLeft        = out[0];
13107                         double                  edgeRight       = out[0];
13108
13109                         if (deSign(c1 * c2) < 0.0)
13110                         {
13111                                 edgeLeft        = -std::numeric_limits<double>::infinity();
13112                                 edgeRight       = +std::numeric_limits<double>::infinity();
13113                         }
13114                         else
13115                         {
13116                                 edgeLeft        = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
13117                                 edgeRight       = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
13118                         }
13119
13120                         min[0] = edgeLeft;
13121                         max[0] = edgeRight;
13122                 }
13123
13124                 return true;
13125         }
13126 };
13127
13128 struct fp16Asin : public fp16PerComponent
13129 {
13130         template<class fp16type>
13131         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13132         {
13133                 const fp16type  x               (*in[0]);
13134                 const double    d               (x.asDouble());
13135                 const double    result  (deAsin(d));
13136                 const double    error   (deAtan2(d, sqrt(1.0 - d * d)));
13137
13138                 if (!x.isNaN() && deAbs(d) > 1.0)
13139                         return false;
13140
13141                 out[0] = fp16type(result).bits();
13142                 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
13143                 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
13144
13145                 return true;
13146         }
13147 };
13148
13149 struct fp16Acos : public fp16PerComponent
13150 {
13151         template<class fp16type>
13152         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13153         {
13154                 const fp16type  x               (*in[0]);
13155                 const double    d               (x.asDouble());
13156                 const double    result  (deAcos(d));
13157                 const double    error   (deAtan2(sqrt(1.0 - d * d), d));
13158
13159                 if (!x.isNaN() && deAbs(d) > 1.0)
13160                         return false;
13161
13162                 out[0] = fp16type(result).bits();
13163                 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
13164                 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
13165
13166                 return true;
13167         }
13168 };
13169
13170 struct fp16Atan : public fp16PerComponent
13171 {
13172         virtual double getULPs(vector<const deFloat16*>& in)
13173         {
13174                 DE_UNREF(in);
13175
13176                 return 2 * 5.0; // This is not a precision test. Value is not from spec
13177         }
13178
13179         template<class fp16type>
13180         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13181         {
13182                 const fp16type  x               (*in[0]);
13183                 const double    d               (x.asDouble());
13184                 const double    result  (deAtanOver(d));
13185
13186                 out[0] = fp16type(result).bits();
13187                 min[0] = getMin(result, getULPs(in));
13188                 max[0] = getMax(result, getULPs(in));
13189
13190                 return true;
13191         }
13192 };
13193
13194 struct fp16Sinh : public fp16PerComponent
13195 {
13196         fp16Sinh() : fp16PerComponent()
13197         {
13198                 flavorNames.push_back("Double");
13199                 flavorNames.push_back("ExpFP16");
13200         }
13201
13202         template<class fp16type>
13203         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13204         {
13205                 const fp16type  x               (*in[0]);
13206                 const double    d               (x.asDouble());
13207                 const double    ulps    (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
13208                 double                  result  (0.0);
13209                 double                  error   (0.0);
13210
13211                 if (getFlavor() == 0)
13212                 {
13213                         result  = deSinh(d);
13214                         error   = floatFormat16.ulp(deAbs(result), ulps);
13215                 }
13216                 else if (getFlavor() == 1)
13217                 {
13218                         const fp16type  epx     (deExp(d));
13219                         const fp16type  enx     (deExp(-d));
13220                         const fp16type  esx     (epx.asDouble() - enx.asDouble());
13221                         const fp16type  sx2     (esx.asDouble() / 2.0);
13222
13223                         result  = sx2.asDouble();
13224                         error   = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
13225                 }
13226                 else
13227                 {
13228                         TCU_THROW(InternalError, "Unknown flavor");
13229                 }
13230
13231                 out[0] = fp16type(result).bits();
13232                 min[0] = result - error;
13233                 max[0] = result + error;
13234
13235                 return true;
13236         }
13237 };
13238
13239 struct fp16Cosh : public fp16PerComponent
13240 {
13241         fp16Cosh() : fp16PerComponent()
13242         {
13243                 flavorNames.push_back("Double");
13244                 flavorNames.push_back("ExpFP16");
13245         }
13246
13247         template<class fp16type>
13248         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13249         {
13250                 const fp16type  x               (*in[0]);
13251                 const double    d               (x.asDouble());
13252                 const double    ulps    (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
13253                 double                  result  (0.0);
13254
13255                 if (getFlavor() == 0)
13256                 {
13257                         result = deCosh(d);
13258                 }
13259                 else if (getFlavor() == 1)
13260                 {
13261                         const fp16type  epx     (deExp(d));
13262                         const fp16type  enx     (deExp(-d));
13263                         const fp16type  esx     (epx.asDouble() + enx.asDouble());
13264                         const fp16type  sx2     (esx.asDouble() / 2.0);
13265
13266                         result = sx2.asDouble();
13267                 }
13268                 else
13269                 {
13270                         TCU_THROW(InternalError, "Unknown flavor");
13271                 }
13272
13273                 out[0] = fp16type(result).bits();
13274                 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
13275                 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
13276
13277                 return true;
13278         }
13279 };
13280
13281 struct fp16Tanh : public fp16PerComponent
13282 {
13283         fp16Tanh() : fp16PerComponent()
13284         {
13285                 flavorNames.push_back("Tanh");
13286                 flavorNames.push_back("SinhCosh");
13287                 flavorNames.push_back("SinhCoshFP16");
13288                 flavorNames.push_back("PolyFP16");
13289         }
13290
13291         virtual double getULPs (vector<const deFloat16*>& in)
13292         {
13293                 const tcu::Float16      x       (*in[0]);
13294                 const double            d       (x.asDouble());
13295
13296                 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
13297         }
13298
13299         template<class fp16type>
13300         inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
13301         {
13302                 const fp16type  esx     (espx.asDouble() - esnx.asDouble());
13303                 const fp16type  sx2     (esx.asDouble() / 2.0);
13304                 const fp16type  ecx     (ecpx.asDouble() + ecnx.asDouble());
13305                 const fp16type  cx2     (ecx.asDouble() / 2.0);
13306                 const fp16type  tg      (sx2.asDouble() / cx2.asDouble());
13307                 const double    rez     (tg.asDouble());
13308
13309                 return rez;
13310         }
13311
13312         template<class fp16type>
13313         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13314         {
13315                 const fp16type  x               (*in[0]);
13316                 const double    d               (x.asDouble());
13317                 double                  result  (0.0);
13318
13319                 if (getFlavor() == 0)
13320                 {
13321                         result  = deTanh(d);
13322                         min[0]  = getMin(result, getULPs(in));
13323                         max[0]  = getMax(result, getULPs(in));
13324                 }
13325                 else if (getFlavor() == 1)
13326                 {
13327                         result  = deSinh(d) / deCosh(d);
13328                         min[0]  = getMin(result, getULPs(in));
13329                         max[0]  = getMax(result, getULPs(in));
13330                 }
13331                 else if (getFlavor() == 2)
13332                 {
13333                         const fp16type  s       (deSinh(d));
13334                         const fp16type  c       (deCosh(d));
13335
13336                         result  = s.asDouble() / c.asDouble();
13337                         min[0]  = getMin(result, getULPs(in));
13338                         max[0]  = getMax(result, getULPs(in));
13339                 }
13340                 else if (getFlavor() == 3)
13341                 {
13342                         const double    ulps    (getULPs(in));
13343                         const double    epxm    (deExp( d));
13344                         const double    enxm    (deExp(-d));
13345                         const double    epxmerr = floatFormat16.ulp(epxm, ulps);
13346                         const double    enxmerr = floatFormat16.ulp(enxm, ulps);
13347                         const fp16type  epx[]   = { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
13348                         const fp16type  enx[]   = { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
13349                         const fp16type  epxm16  (epxm);
13350                         const fp16type  enxm16  (enxm);
13351                         vector<double>  tgs;
13352
13353                         for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
13354                         for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
13355                         for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
13356                         for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
13357                         {
13358                                 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
13359
13360                                 tgs.push_back(tgh);
13361                         }
13362
13363                         result = calcPoly(epxm16, enxm16, epxm16, enxm16);
13364                         min[0] = *std::min_element(tgs.begin(), tgs.end());
13365                         max[0] = *std::max_element(tgs.begin(), tgs.end());
13366                 }
13367                 else
13368                 {
13369                         TCU_THROW(InternalError, "Unknown flavor");
13370                 }
13371
13372                 out[0] = fp16type(result).bits();
13373
13374                 return true;
13375         }
13376 };
13377
13378 struct fp16Asinh : public fp16PerComponent
13379 {
13380         fp16Asinh() : fp16PerComponent()
13381         {
13382                 flavorNames.push_back("Double");
13383                 flavorNames.push_back("PolyFP16Wiki");
13384                 flavorNames.push_back("PolyFP16Abs");
13385         }
13386
13387         virtual double getULPs (vector<const deFloat16*>& in)
13388         {
13389                 DE_UNREF(in);
13390
13391                 return 256.0; // This is not a precision test. Value is not from spec
13392         }
13393
13394         template<class fp16type>
13395         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13396         {
13397                 const fp16type  x               (*in[0]);
13398                 const double    d               (x.asDouble());
13399                 double                  result  (0.0);
13400
13401                 if (getFlavor() == 0)
13402                 {
13403                         result = deAsinh(d);
13404                 }
13405                 else if (getFlavor() == 1)
13406                 {
13407                         const fp16type  x2              (d * d);
13408                         const fp16type  x2p1    (x2.asDouble() + 1.0);
13409                         const fp16type  sq              (deSqrt(x2p1.asDouble()));
13410                         const fp16type  sxsq    (d + sq.asDouble());
13411                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
13412
13413                         if (lsxsq.isInf())
13414                                 return false;
13415
13416                         result = lsxsq.asDouble();
13417                 }
13418                 else if (getFlavor() == 2)
13419                 {
13420                         const fp16type  x2              (d * d);
13421                         const fp16type  x2p1    (x2.asDouble() + 1.0);
13422                         const fp16type  sq              (deSqrt(x2p1.asDouble()));
13423                         const fp16type  sxsq    (deAbs(d) + sq.asDouble());
13424                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
13425
13426                         result = deSign(d) * lsxsq.asDouble();
13427                 }
13428                 else
13429                 {
13430                         TCU_THROW(InternalError, "Unknown flavor");
13431                 }
13432
13433                 out[0] = fp16type(result).bits();
13434                 min[0] = getMin(result, getULPs(in));
13435                 max[0] = getMax(result, getULPs(in));
13436
13437                 return true;
13438         }
13439 };
13440
13441 struct fp16Acosh : public fp16PerComponent
13442 {
13443         fp16Acosh() : fp16PerComponent()
13444         {
13445                 flavorNames.push_back("Double");
13446                 flavorNames.push_back("PolyFP16");
13447         }
13448
13449         virtual double getULPs (vector<const deFloat16*>& in)
13450         {
13451                 DE_UNREF(in);
13452
13453                 return 16.0; // This is not a precision test. Value is not from spec
13454         }
13455
13456         template<class fp16type>
13457         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13458         {
13459                 const fp16type  x               (*in[0]);
13460                 const double    d               (x.asDouble());
13461                 double                  result  (0.0);
13462
13463                 if (!x.isNaN() && d < 1.0)
13464                         return false;
13465
13466                 if (getFlavor() == 0)
13467                 {
13468                         result = deAcosh(d);
13469                 }
13470                 else if (getFlavor() == 1)
13471                 {
13472                         const fp16type  x2              (d * d);
13473                         const fp16type  x2m1    (x2.asDouble() - 1.0);
13474                         const fp16type  sq              (deSqrt(x2m1.asDouble()));
13475                         const fp16type  sxsq    (d + sq.asDouble());
13476                         const fp16type  lsxsq   (deLog(sxsq.asDouble()));
13477
13478                         result = lsxsq.asDouble();
13479                 }
13480                 else
13481                 {
13482                         TCU_THROW(InternalError, "Unknown flavor");
13483                 }
13484
13485                 out[0] = fp16type(result).bits();
13486                 min[0] = getMin(result, getULPs(in));
13487                 max[0] = getMax(result, getULPs(in));
13488
13489                 return true;
13490         }
13491 };
13492
13493 struct fp16Atanh : public fp16PerComponent
13494 {
13495         fp16Atanh() : fp16PerComponent()
13496         {
13497                 flavorNames.push_back("Double");
13498                 flavorNames.push_back("PolyFP16");
13499         }
13500
13501         template<class fp16type>
13502         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13503         {
13504                 const fp16type  x               (*in[0]);
13505                 const double    d               (x.asDouble());
13506                 double                  result  (0.0);
13507
13508                 if (deAbs(d) >= 1.0)
13509                         return false;
13510
13511                 if (getFlavor() == 0)
13512                 {
13513                         const double    ulps    (16.0); // This is not a precision test. Value is not from spec
13514
13515                         result = deAtanh(d);
13516                         min[0] = getMin(result, ulps);
13517                         max[0] = getMax(result, ulps);
13518                 }
13519                 else if (getFlavor() == 1)
13520                 {
13521                         const fp16type  x1a             (1.0 + d);
13522                         const fp16type  x1b             (1.0 - d);
13523                         const fp16type  x1d             (x1a.asDouble() / x1b.asDouble());
13524                         const fp16type  lx1d    (deLog(x1d.asDouble()));
13525                         const fp16type  lx1d2   (0.5 * lx1d.asDouble());
13526                         const double    error   (2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
13527
13528                         result = lx1d2.asDouble();
13529                         min[0] = result - error;
13530                         max[0] = result + error;
13531                 }
13532                 else
13533                 {
13534                         TCU_THROW(InternalError, "Unknown flavor");
13535                 }
13536
13537                 out[0] = fp16type(result).bits();
13538
13539                 return true;
13540         }
13541 };
13542
13543 struct fp16Exp : public fp16PerComponent
13544 {
13545         template<class fp16type>
13546         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13547         {
13548                 const fp16type  x               (*in[0]);
13549                 const double    d               (x.asDouble());
13550                 const double    ulps    (10.0 * (1.0 + 2.0 * deAbs(d)));
13551                 const double    result  (deExp(d));
13552
13553                 out[0] = fp16type(result).bits();
13554                 min[0] = getMin(result, ulps);
13555                 max[0] = getMax(result, ulps);
13556
13557                 return true;
13558         }
13559 };
13560
13561 struct fp16Log : public fp16PerComponent
13562 {
13563         template<class fp16type>
13564         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13565         {
13566                 const fp16type  x               (*in[0]);
13567                 const double    d               (x.asDouble());
13568                 const double    result  (deLog(d));
13569                 const double    error   (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
13570
13571                 if (d <= 0.0)
13572                         return false;
13573
13574                 out[0] = fp16type(result).bits();
13575                 min[0] = result - error;
13576                 max[0] = result + error;
13577
13578                 return true;
13579         }
13580 };
13581
13582 struct fp16Exp2 : public fp16PerComponent
13583 {
13584         template<class fp16type>
13585         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13586         {
13587                 const fp16type  x               (*in[0]);
13588                 const double    d               (x.asDouble());
13589                 const double    result  (deExp2(d));
13590                 const double    ulps    (1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
13591
13592                 out[0] = fp16type(result).bits();
13593                 min[0] = getMin(result, ulps);
13594                 max[0] = getMax(result, ulps);
13595
13596                 return true;
13597         }
13598 };
13599
13600 struct fp16Log2 : public fp16PerComponent
13601 {
13602         template<class fp16type>
13603         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13604         {
13605                 const fp16type  x               (*in[0]);
13606                 const double    d               (x.asDouble());
13607                 const double    result  (deLog2(d));
13608                 const double    error   (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
13609
13610                 if (d <= 0.0)
13611                         return false;
13612
13613                 out[0] = fp16type(result).bits();
13614                 min[0] = result - error;
13615                 max[0] = result + error;
13616
13617                 return true;
13618         }
13619 };
13620
13621 struct fp16Sqrt : public fp16PerComponent
13622 {
13623         virtual double getULPs (vector<const deFloat16*>& in)
13624         {
13625                 DE_UNREF(in);
13626
13627                 return 6.0;
13628         }
13629
13630         template<class fp16type>
13631         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13632         {
13633                 const fp16type  x               (*in[0]);
13634                 const double    d               (x.asDouble());
13635                 const double    result  (deSqrt(d));
13636
13637                 if (!x.isNaN() && d < 0.0)
13638                         return false;
13639
13640                 out[0] = fp16type(result).bits();
13641                 min[0] = getMin(result, getULPs(in));
13642                 max[0] = getMax(result, getULPs(in));
13643
13644                 return true;
13645         }
13646 };
13647
13648 struct fp16InverseSqrt : public fp16PerComponent
13649 {
13650         virtual double getULPs (vector<const deFloat16*>& in)
13651         {
13652                 DE_UNREF(in);
13653
13654                 return 2.0;
13655         }
13656
13657         template<class fp16type>
13658         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13659         {
13660                 const fp16type  x               (*in[0]);
13661                 const double    d               (x.asDouble());
13662                 const double    result  (1.0/deSqrt(d));
13663
13664                 if (!x.isNaN() && d <= 0.0)
13665                         return false;
13666
13667                 out[0] = fp16type(result).bits();
13668                 min[0] = getMin(result, getULPs(in));
13669                 max[0] = getMax(result, getULPs(in));
13670
13671                 return true;
13672         }
13673 };
13674
13675 struct fp16ModfFrac : public fp16PerComponent
13676 {
13677         template<class fp16type>
13678         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13679         {
13680                 const fp16type  x               (*in[0]);
13681                 const double    d               (x.asDouble());
13682                 double                  i               (0.0);
13683                 const double    result  (deModf(d, &i));
13684
13685                 if (x.isInf() || x.isNaN())
13686                         return false;
13687
13688                 out[0] = fp16type(result).bits();
13689                 min[0] = getMin(result, getULPs(in));
13690                 max[0] = getMax(result, getULPs(in));
13691
13692                 return true;
13693         }
13694 };
13695
13696 struct fp16ModfInt : public fp16PerComponent
13697 {
13698         template<class fp16type>
13699         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13700         {
13701                 const fp16type  x               (*in[0]);
13702                 const double    d               (x.asDouble());
13703                 double                  i               (0.0);
13704                 const double    dummy   (deModf(d, &i));
13705                 const double    result  (i);
13706
13707                 DE_UNREF(dummy);
13708
13709                 if (x.isInf() || x.isNaN())
13710                         return false;
13711
13712                 out[0] = fp16type(result).bits();
13713                 min[0] = getMin(result, getULPs(in));
13714                 max[0] = getMax(result, getULPs(in));
13715
13716                 return true;
13717         }
13718 };
13719
13720 struct fp16FrexpS : public fp16PerComponent
13721 {
13722         template<class fp16type>
13723         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13724         {
13725                 const fp16type  x               (*in[0]);
13726                 const double    d               (x.asDouble());
13727                 int                             e               (0);
13728                 const double    result  (deFrExp(d, &e));
13729
13730                 if (x.isNaN() || x.isInf())
13731                         return false;
13732
13733                 out[0] = fp16type(result).bits();
13734                 min[0] = getMin(result, getULPs(in));
13735                 max[0] = getMax(result, getULPs(in));
13736
13737                 return true;
13738         }
13739 };
13740
13741 struct fp16FrexpE : public fp16PerComponent
13742 {
13743         template<class fp16type>
13744         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13745         {
13746                 const fp16type  x               (*in[0]);
13747                 const double    d               (x.asDouble());
13748                 int                             e               (0);
13749                 const double    dummy   (deFrExp(d, &e));
13750                 const double    result  (static_cast<double>(e));
13751
13752                 DE_UNREF(dummy);
13753
13754                 if (x.isNaN() || x.isInf())
13755                         return false;
13756
13757                 out[0] = fp16type(result).bits();
13758                 min[0] = getMin(result, getULPs(in));
13759                 max[0] = getMax(result, getULPs(in));
13760
13761                 return true;
13762         }
13763 };
13764
13765 struct fp16OpFAdd : public fp16PerComponent
13766 {
13767         template<class fp16type>
13768         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13769         {
13770                 const fp16type  x               (*in[0]);
13771                 const fp16type  y               (*in[1]);
13772                 const double    xd              (x.asDouble());
13773                 const double    yd              (y.asDouble());
13774                 const double    result  (xd + yd);
13775
13776                 out[0] = fp16type(result).bits();
13777                 min[0] = getMin(result, getULPs(in));
13778                 max[0] = getMax(result, getULPs(in));
13779
13780                 return true;
13781         }
13782 };
13783
13784 struct fp16OpFSub : public fp16PerComponent
13785 {
13786         template<class fp16type>
13787         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13788         {
13789                 const fp16type  x               (*in[0]);
13790                 const fp16type  y               (*in[1]);
13791                 const double    xd              (x.asDouble());
13792                 const double    yd              (y.asDouble());
13793                 const double    result  (xd - yd);
13794
13795                 out[0] = fp16type(result).bits();
13796                 min[0] = getMin(result, getULPs(in));
13797                 max[0] = getMax(result, getULPs(in));
13798
13799                 return true;
13800         }
13801 };
13802
13803 struct fp16OpFMul : public fp16PerComponent
13804 {
13805         template<class fp16type>
13806         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13807         {
13808                 const fp16type  x               (*in[0]);
13809                 const fp16type  y               (*in[1]);
13810                 const double    xd              (x.asDouble());
13811                 const double    yd              (y.asDouble());
13812                 const double    result  (xd * yd);
13813
13814                 out[0] = fp16type(result).bits();
13815                 min[0] = getMin(result, getULPs(in));
13816                 max[0] = getMax(result, getULPs(in));
13817
13818                 return true;
13819         }
13820 };
13821
13822 struct fp16OpFDiv : public fp16PerComponent
13823 {
13824         fp16OpFDiv() : fp16PerComponent()
13825         {
13826                 flavorNames.push_back("DirectDiv");
13827                 flavorNames.push_back("InverseDiv");
13828         }
13829
13830         template<class fp16type>
13831         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13832         {
13833                 const fp16type  x                       (*in[0]);
13834                 const fp16type  y                       (*in[1]);
13835                 const double    xd                      (x.asDouble());
13836                 const double    yd                      (y.asDouble());
13837                 const double    unspecUlp       (16.0);
13838                 const double    ulpCnt          (de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
13839                 double                  result          (0.0);
13840
13841                 if (y.isZero())
13842                         return false;
13843
13844                 if (getFlavor() == 0)
13845                 {
13846                         result = (xd / yd);
13847                 }
13848                 else if (getFlavor() == 1)
13849                 {
13850                         const double    invyd   (1.0 / yd);
13851                         const fp16type  invy    (invyd);
13852
13853                         result = (xd * invy.asDouble());
13854                 }
13855                 else
13856                 {
13857                         TCU_THROW(InternalError, "Unknown flavor");
13858                 }
13859
13860                 out[0] = fp16type(result).bits();
13861                 min[0] = getMin(result, ulpCnt);
13862                 max[0] = getMax(result, ulpCnt);
13863
13864                 return true;
13865         }
13866 };
13867
13868 struct fp16Atan2 : public fp16PerComponent
13869 {
13870         fp16Atan2() : fp16PerComponent()
13871         {
13872                 flavorNames.push_back("DoubleCalc");
13873                 flavorNames.push_back("DoubleCalc_PI");
13874         }
13875
13876         virtual double getULPs(vector<const deFloat16*>& in)
13877         {
13878                 DE_UNREF(in);
13879
13880                 return 2 * 5.0; // This is not a precision test. Value is not from spec
13881         }
13882
13883         template<class fp16type>
13884         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13885         {
13886                 const fp16type  x               (*in[0]);
13887                 const fp16type  y               (*in[1]);
13888                 const double    xd              (x.asDouble());
13889                 const double    yd              (y.asDouble());
13890                 double                  result  (0.0);
13891
13892                 if (x.isZero() && y.isZero())
13893                         return false;
13894
13895                 if (getFlavor() == 0)
13896                 {
13897                         result  = deAtan2(xd, yd);
13898                 }
13899                 else if (getFlavor() == 1)
13900                 {
13901                         const double    ulps    (2.0 * 5.0); // This is not a precision test. Value is not from spec
13902                         const double    eps             (floatFormat16.ulp(DE_PI_DOUBLE, ulps));
13903
13904                         result  = deAtan2(xd, yd);
13905
13906                         if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
13907                                 result  = -result;
13908                 }
13909                 else
13910                 {
13911                         TCU_THROW(InternalError, "Unknown flavor");
13912                 }
13913
13914                 out[0] = fp16type(result).bits();
13915                 min[0] = getMin(result, getULPs(in));
13916                 max[0] = getMax(result, getULPs(in));
13917
13918                 return true;
13919         }
13920 };
13921
13922 struct fp16Pow : public fp16PerComponent
13923 {
13924         fp16Pow() : fp16PerComponent()
13925         {
13926                 flavorNames.push_back("Pow");
13927                 flavorNames.push_back("PowLog2");
13928                 flavorNames.push_back("PowLog2FP16");
13929         }
13930
13931         template<class fp16type>
13932         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13933         {
13934                 const fp16type  x               (*in[0]);
13935                 const fp16type  y               (*in[1]);
13936                 const double    xd              (x.asDouble());
13937                 const double    yd              (y.asDouble());
13938                 const double    logxeps (de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
13939                 const double    ulps1   (1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
13940                 const double    ulps2   (1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
13941                 const double    ulps    (deMax(deAbs(ulps1), deAbs(ulps2)));
13942                 double                  result  (0.0);
13943
13944                 if (xd < 0.0)
13945                         return false;
13946
13947                 if (x.isZero() && yd <= 0.0)
13948                         return false;
13949
13950                 if (getFlavor() == 0)
13951                 {
13952                         result = dePow(xd, yd);
13953                 }
13954                 else if (getFlavor() == 1)
13955                 {
13956                         const double    l2d     (deLog2(xd));
13957                         const double    e2d     (deExp2(yd * l2d));
13958
13959                         result = e2d;
13960                 }
13961                 else if (getFlavor() == 2)
13962                 {
13963                         const double    l2d     (deLog2(xd));
13964                         const fp16type  l2      (l2d);
13965                         const double    e2d     (deExp2(yd * l2.asDouble()));
13966                         const fp16type  e2      (e2d);
13967
13968                         result = e2.asDouble();
13969                 }
13970                 else
13971                 {
13972                         TCU_THROW(InternalError, "Unknown flavor");
13973                 }
13974
13975                 out[0] = fp16type(result).bits();
13976                 min[0] = getMin(result, ulps);
13977                 max[0] = getMax(result, ulps);
13978
13979                 return true;
13980         }
13981 };
13982
13983 struct fp16FMin : public fp16PerComponent
13984 {
13985         template<class fp16type>
13986         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
13987         {
13988                 const fp16type  x               (*in[0]);
13989                 const fp16type  y               (*in[1]);
13990                 const double    xd              (x.asDouble());
13991                 const double    yd              (y.asDouble());
13992                 const double    result  (deMin(xd, yd));
13993
13994                 if (x.isNaN() || y.isNaN())
13995                         return false;
13996
13997                 out[0] = fp16type(result).bits();
13998                 min[0] = getMin(result, getULPs(in));
13999                 max[0] = getMax(result, getULPs(in));
14000
14001                 return true;
14002         }
14003 };
14004
14005 struct fp16FMax : public fp16PerComponent
14006 {
14007         template<class fp16type>
14008         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14009         {
14010                 const fp16type  x               (*in[0]);
14011                 const fp16type  y               (*in[1]);
14012                 const double    xd              (x.asDouble());
14013                 const double    yd              (y.asDouble());
14014                 const double    result  (deMax(xd, yd));
14015
14016                 if (x.isNaN() || y.isNaN())
14017                         return false;
14018
14019                 out[0] = fp16type(result).bits();
14020                 min[0] = getMin(result, getULPs(in));
14021                 max[0] = getMax(result, getULPs(in));
14022
14023                 return true;
14024         }
14025 };
14026
14027 struct fp16Step : public fp16PerComponent
14028 {
14029         template<class fp16type>
14030         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14031         {
14032                 const fp16type  edge    (*in[0]);
14033                 const fp16type  x               (*in[1]);
14034                 const double    edged   (edge.asDouble());
14035                 const double    xd              (x.asDouble());
14036                 const double    result  (deStep(edged, xd));
14037
14038                 out[0] = fp16type(result).bits();
14039                 min[0] = getMin(result, getULPs(in));
14040                 max[0] = getMax(result, getULPs(in));
14041
14042                 return true;
14043         }
14044 };
14045
14046 struct fp16Ldexp : public fp16PerComponent
14047 {
14048         template<class fp16type>
14049         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14050         {
14051                 const fp16type  x               (*in[0]);
14052                 const fp16type  y               (*in[1]);
14053                 const double    xd              (x.asDouble());
14054                 const int               yd              (static_cast<int>(deTrunc(y.asDouble())));
14055                 const double    result  (deLdExp(xd, yd));
14056
14057                 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
14058                         return false;
14059
14060                 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
14061                 if (fp16type(result).isInf())
14062                         return false;
14063
14064                 out[0] = fp16type(result).bits();
14065                 min[0] = getMin(result, getULPs(in));
14066                 max[0] = getMax(result, getULPs(in));
14067
14068                 return true;
14069         }
14070 };
14071
14072 struct fp16FClamp : public fp16PerComponent
14073 {
14074         template<class fp16type>
14075         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14076         {
14077                 const fp16type  x               (*in[0]);
14078                 const fp16type  minVal  (*in[1]);
14079                 const fp16type  maxVal  (*in[2]);
14080                 const double    xd              (x.asDouble());
14081                 const double    minVald (minVal.asDouble());
14082                 const double    maxVald (maxVal.asDouble());
14083                 const double    result  (deClamp(xd, minVald, maxVald));
14084
14085                 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
14086                         return false;
14087
14088                 out[0] = fp16type(result).bits();
14089                 min[0] = getMin(result, getULPs(in));
14090                 max[0] = getMax(result, getULPs(in));
14091
14092                 return true;
14093         }
14094 };
14095
14096 struct fp16FMix : public fp16PerComponent
14097 {
14098         fp16FMix() : fp16PerComponent()
14099         {
14100                 flavorNames.push_back("DoubleCalc");
14101                 flavorNames.push_back("EmulatingFP16");
14102                 flavorNames.push_back("EmulatingFP16YminusX");
14103         }
14104
14105         template<class fp16type>
14106         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14107         {
14108                 const fp16type  x               (*in[0]);
14109                 const fp16type  y               (*in[1]);
14110                 const fp16type  a               (*in[2]);
14111                 const double    ulps    (8.0); // This is not a precision test. Value is not from spec
14112                 double                  result  (0.0);
14113
14114                 if (getFlavor() == 0)
14115                 {
14116                         const double    xd              (x.asDouble());
14117                         const double    yd              (y.asDouble());
14118                         const double    ad              (a.asDouble());
14119                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
14120                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
14121                         const double    eps             (xeps + yeps);
14122
14123                         result = deMix(xd, yd, ad);
14124                         min[0] = result - eps;
14125                         max[0] = result + eps;
14126                 }
14127                 else if (getFlavor() == 1)
14128                 {
14129                         const double    xd              (x.asDouble());
14130                         const double    yd              (y.asDouble());
14131                         const double    ad              (a.asDouble());
14132                         const fp16type  am              (1.0 - ad);
14133                         const double    amd             (am.asDouble());
14134                         const fp16type  xam             (xd * amd);
14135                         const double    xamd    (xam.asDouble());
14136                         const fp16type  ya              (yd * ad);
14137                         const double    yad             (ya.asDouble());
14138                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
14139                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
14140                         const double    eps             (xeps + yeps);
14141
14142                         result = xamd + yad;
14143                         min[0] = result - eps;
14144                         max[0] = result + eps;
14145                 }
14146                 else if (getFlavor() == 2)
14147                 {
14148                         const double    xd              (x.asDouble());
14149                         const double    yd              (y.asDouble());
14150                         const double    ad              (a.asDouble());
14151                         const fp16type  ymx             (yd - xd);
14152                         const double    ymxd    (ymx.asDouble());
14153                         const fp16type  ymxa    (ymxd * ad);
14154                         const double    ymxad   (ymxa.asDouble());
14155                         const double    xeps    (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
14156                         const double    yeps    (floatFormat16.ulp(deAbs(yd * ad), ulps));
14157                         const double    eps             (xeps + yeps);
14158
14159                         result = xd + ymxad;
14160                         min[0] = result - eps;
14161                         max[0] = result + eps;
14162                 }
14163                 else
14164                 {
14165                         TCU_THROW(InternalError, "Unknown flavor");
14166                 }
14167
14168                 out[0] = fp16type(result).bits();
14169
14170                 return true;
14171         }
14172 };
14173
14174 struct fp16SmoothStep : public fp16PerComponent
14175 {
14176         fp16SmoothStep() : fp16PerComponent()
14177         {
14178                 flavorNames.push_back("FloatCalc");
14179                 flavorNames.push_back("EmulatingFP16");
14180                 flavorNames.push_back("EmulatingFP16WClamp");
14181         }
14182
14183         virtual double getULPs(vector<const deFloat16*>& in)
14184         {
14185                 DE_UNREF(in);
14186
14187                 return 4.0; // This is not a precision test. Value is not from spec
14188         }
14189
14190         template<class fp16type>
14191         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14192         {
14193                 const fp16type  edge0   (*in[0]);
14194                 const fp16type  edge1   (*in[1]);
14195                 const fp16type  x               (*in[2]);
14196                 double                  result  (0.0);
14197
14198                 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
14199                         return false;
14200
14201                 if (edge0.isInf() || edge1.isInf() || x.isInf())
14202                         return false;
14203
14204                 if (getFlavor() == 0)
14205                 {
14206                         const float     edge0d  (edge0.asFloat());
14207                         const float     edge1d  (edge1.asFloat());
14208                         const float     xd              (x.asFloat());
14209                         const float     sstep   (deFloatSmoothStep(edge0d, edge1d, xd));
14210
14211                         result = sstep;
14212                 }
14213                 else if (getFlavor() == 1)
14214                 {
14215                         const double    edge0d  (edge0.asDouble());
14216                         const double    edge1d  (edge1.asDouble());
14217                         const double    xd              (x.asDouble());
14218
14219                         if (xd <= edge0d)
14220                                 result = 0.0;
14221                         else if (xd >= edge1d)
14222                                 result = 1.0;
14223                         else
14224                         {
14225                                 const fp16type  a       (xd - edge0d);
14226                                 const fp16type  b       (edge1d - edge0d);
14227                                 const fp16type  t       (a.asDouble() / b.asDouble());
14228                                 const fp16type  t2      (2.0 * t.asDouble());
14229                                 const fp16type  t3      (3.0 - t2.asDouble());
14230                                 const fp16type  t4      (t.asDouble() * t3.asDouble());
14231                                 const fp16type  t5      (t.asDouble() * t4.asDouble());
14232
14233                                 result = t5.asDouble();
14234                         }
14235                 }
14236                 else if (getFlavor() == 2)
14237                 {
14238                         const double    edge0d  (edge0.asDouble());
14239                         const double    edge1d  (edge1.asDouble());
14240                         const double    xd              (x.asDouble());
14241                         const fp16type  a       (xd - edge0d);
14242                         const fp16type  b       (edge1d - edge0d);
14243                         const fp16type  bi      (1.0 / b.asDouble());
14244                         const fp16type  t0      (a.asDouble() * bi.asDouble());
14245                         const double    tc      (deClamp(t0.asDouble(), 0.0, 1.0));
14246                         const fp16type  t       (tc);
14247                         const fp16type  t2      (2.0 * t.asDouble());
14248                         const fp16type  t3      (3.0 - t2.asDouble());
14249                         const fp16type  t4      (t.asDouble() * t3.asDouble());
14250                         const fp16type  t5      (t.asDouble() * t4.asDouble());
14251
14252                         result = t5.asDouble();
14253                 }
14254                 else
14255                 {
14256                         TCU_THROW(InternalError, "Unknown flavor");
14257                 }
14258
14259                 out[0] = fp16type(result).bits();
14260                 min[0] = getMin(result, getULPs(in));
14261                 max[0] = getMax(result, getULPs(in));
14262
14263                 return true;
14264         }
14265 };
14266
14267 struct fp16Fma : public fp16PerComponent
14268 {
14269         fp16Fma()
14270         {
14271                 flavorNames.push_back("DoubleCalc");
14272                 flavorNames.push_back("EmulatingFP16");
14273         }
14274
14275         virtual double getULPs(vector<const deFloat16*>& in)
14276         {
14277                 DE_UNREF(in);
14278
14279                 return 16.0;
14280         }
14281
14282         template<class fp16type>
14283         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14284         {
14285                 DE_ASSERT(in.size() == 3);
14286                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
14287                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
14288                 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
14289                 DE_ASSERT(getOutCompCount() > 0);
14290
14291                 const fp16type  a               (*in[0]);
14292                 const fp16type  b               (*in[1]);
14293                 const fp16type  c               (*in[2]);
14294                 double                  result  (0.0);
14295
14296                 if (getFlavor() == 0)
14297                 {
14298                         const double    ad      (a.asDouble());
14299                         const double    bd      (b.asDouble());
14300                         const double    cd      (c.asDouble());
14301
14302                         result  = deMadd(ad, bd, cd);
14303                 }
14304                 else if (getFlavor() == 1)
14305                 {
14306                         const double    ad      (a.asDouble());
14307                         const double    bd      (b.asDouble());
14308                         const double    cd      (c.asDouble());
14309                         const fp16type  ab      (ad * bd);
14310                         const fp16type  r       (ab.asDouble() + cd);
14311
14312                         result  = r.asDouble();
14313                 }
14314                 else
14315                 {
14316                         TCU_THROW(InternalError, "Unknown flavor");
14317                 }
14318
14319                 out[0] = fp16type(result).bits();
14320                 min[0] = getMin(result, getULPs(in));
14321                 max[0] = getMax(result, getULPs(in));
14322
14323                 return true;
14324         }
14325 };
14326
14327
14328 struct fp16AllComponents : public fp16PerComponent
14329 {
14330         bool            callOncePerComponent    ()      { return false; }
14331 };
14332
14333 struct fp16Length : public fp16AllComponents
14334 {
14335         fp16Length() : fp16AllComponents()
14336         {
14337                 flavorNames.push_back("EmulatingFP16");
14338                 flavorNames.push_back("DoubleCalc");
14339         }
14340
14341         virtual double getULPs(vector<const deFloat16*>& in)
14342         {
14343                 DE_UNREF(in);
14344
14345                 return 4.0;
14346         }
14347
14348         template<class fp16type>
14349         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14350         {
14351                 DE_ASSERT(getOutCompCount() == 1);
14352                 DE_ASSERT(in.size() == 1);
14353
14354                 double  result  (0.0);
14355
14356                 if (getFlavor() == 0)
14357                 {
14358                         fp16type        r       (0.0);
14359
14360                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14361                         {
14362                                 const fp16type  x       (in[0][componentNdx]);
14363                                 const fp16type  q       (x.asDouble() * x.asDouble());
14364
14365                                 r = fp16type(r.asDouble() + q.asDouble());
14366                         }
14367
14368                         result = deSqrt(r.asDouble());
14369
14370                         out[0] = fp16type(result).bits();
14371                 }
14372                 else if (getFlavor() == 1)
14373                 {
14374                         double  r       (0.0);
14375
14376                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14377                         {
14378                                 const fp16type  x       (in[0][componentNdx]);
14379                                 const double    q       (x.asDouble() * x.asDouble());
14380
14381                                 r += q;
14382                         }
14383
14384                         result = deSqrt(r);
14385
14386                         out[0] = fp16type(result).bits();
14387                 }
14388                 else
14389                 {
14390                         TCU_THROW(InternalError, "Unknown flavor");
14391                 }
14392
14393                 min[0] = getMin(result, getULPs(in));
14394                 max[0] = getMax(result, getULPs(in));
14395
14396                 return true;
14397         }
14398 };
14399
14400 struct fp16Distance : public fp16AllComponents
14401 {
14402         fp16Distance() : fp16AllComponents()
14403         {
14404                 flavorNames.push_back("EmulatingFP16");
14405                 flavorNames.push_back("DoubleCalc");
14406         }
14407
14408         virtual double getULPs(vector<const deFloat16*>& in)
14409         {
14410                 DE_UNREF(in);
14411
14412                 return 4.0;
14413         }
14414
14415         template<class fp16type>
14416         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14417         {
14418                 DE_ASSERT(getOutCompCount() == 1);
14419                 DE_ASSERT(in.size() == 2);
14420                 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
14421
14422                 double  result  (0.0);
14423
14424                 if (getFlavor() == 0)
14425                 {
14426                         fp16type        r       (0.0);
14427
14428                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14429                         {
14430                                 const fp16type  x       (in[0][componentNdx]);
14431                                 const fp16type  y       (in[1][componentNdx]);
14432                                 const fp16type  d       (x.asDouble() - y.asDouble());
14433                                 const fp16type  q       (d.asDouble() * d.asDouble());
14434
14435                                 r = fp16type(r.asDouble() + q.asDouble());
14436                         }
14437
14438                         result = deSqrt(r.asDouble());
14439                 }
14440                 else if (getFlavor() == 1)
14441                 {
14442                         double  r       (0.0);
14443
14444                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14445                         {
14446                                 const fp16type  x       (in[0][componentNdx]);
14447                                 const fp16type  y       (in[1][componentNdx]);
14448                                 const double    d       (x.asDouble() - y.asDouble());
14449                                 const double    q       (d * d);
14450
14451                                 r += q;
14452                         }
14453
14454                         result = deSqrt(r);
14455                 }
14456                 else
14457                 {
14458                         TCU_THROW(InternalError, "Unknown flavor");
14459                 }
14460
14461                 out[0] = fp16type(result).bits();
14462                 min[0] = getMin(result, getULPs(in));
14463                 max[0] = getMax(result, getULPs(in));
14464
14465                 return true;
14466         }
14467 };
14468
14469 struct fp16Cross : public fp16AllComponents
14470 {
14471         fp16Cross() : fp16AllComponents()
14472         {
14473                 flavorNames.push_back("EmulatingFP16");
14474                 flavorNames.push_back("DoubleCalc");
14475         }
14476
14477         virtual double getULPs(vector<const deFloat16*>& in)
14478         {
14479                 DE_UNREF(in);
14480
14481                 return 4.0;
14482         }
14483
14484         template<class fp16type>
14485         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14486         {
14487                 DE_ASSERT(getOutCompCount() == 3);
14488                 DE_ASSERT(in.size() == 2);
14489                 DE_ASSERT(getArgCompCount(0) == 3);
14490                 DE_ASSERT(getArgCompCount(1) == 3);
14491
14492                 if (getFlavor() == 0)
14493                 {
14494                         const fp16type  x0              (in[0][0]);
14495                         const fp16type  x1              (in[0][1]);
14496                         const fp16type  x2              (in[0][2]);
14497                         const fp16type  y0              (in[1][0]);
14498                         const fp16type  y1              (in[1][1]);
14499                         const fp16type  y2              (in[1][2]);
14500                         const fp16type  x1y2    (x1.asDouble() * y2.asDouble());
14501                         const fp16type  y1x2    (y1.asDouble() * x2.asDouble());
14502                         const fp16type  x2y0    (x2.asDouble() * y0.asDouble());
14503                         const fp16type  y2x0    (y2.asDouble() * x0.asDouble());
14504                         const fp16type  x0y1    (x0.asDouble() * y1.asDouble());
14505                         const fp16type  y0x1    (y0.asDouble() * x1.asDouble());
14506
14507                         out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
14508                         out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
14509                         out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
14510                 }
14511                 else if (getFlavor() == 1)
14512                 {
14513                         const fp16type  x0              (in[0][0]);
14514                         const fp16type  x1              (in[0][1]);
14515                         const fp16type  x2              (in[0][2]);
14516                         const fp16type  y0              (in[1][0]);
14517                         const fp16type  y1              (in[1][1]);
14518                         const fp16type  y2              (in[1][2]);
14519                         const double    x1y2    (x1.asDouble() * y2.asDouble());
14520                         const double    y1x2    (y1.asDouble() * x2.asDouble());
14521                         const double    x2y0    (x2.asDouble() * y0.asDouble());
14522                         const double    y2x0    (y2.asDouble() * x0.asDouble());
14523                         const double    x0y1    (x0.asDouble() * y1.asDouble());
14524                         const double    y0x1    (y0.asDouble() * x1.asDouble());
14525
14526                         out[0] = fp16type(x1y2 - y1x2).bits();
14527                         out[1] = fp16type(x2y0 - y2x0).bits();
14528                         out[2] = fp16type(x0y1 - y0x1).bits();
14529                 }
14530                 else
14531                 {
14532                         TCU_THROW(InternalError, "Unknown flavor");
14533                 }
14534
14535                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14536                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
14537                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14538                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
14539
14540                 return true;
14541         }
14542 };
14543
14544 struct fp16Normalize : public fp16AllComponents
14545 {
14546         fp16Normalize() : fp16AllComponents()
14547         {
14548                 flavorNames.push_back("EmulatingFP16");
14549                 flavorNames.push_back("DoubleCalc");
14550
14551                 // flavorNames will be extended later
14552         }
14553
14554         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)
14555         {
14556                 DE_ASSERT(argCompCount[argNo] == 0); // Once only
14557
14558                 if (argNo == 0 && argCompCount[argNo] == 0)
14559                 {
14560                         const size_t            maxPermutationsCount    = 24u; // Equal to 4!
14561                         std::vector<int>        indices;
14562
14563                         for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
14564                                 indices.push_back(static_cast<int>(componentNdx));
14565
14566                         m_permutations.reserve(maxPermutationsCount);
14567
14568                         permutationsFlavorStart = flavorNames.size();
14569
14570                         do
14571                         {
14572                                 tcu::UVec4      permutation;
14573                                 std::string     name            = "Permutted_";
14574
14575                                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
14576                                 {
14577                                         permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
14578                                         name += de::toString(indices[componentNdx]);
14579                                 }
14580
14581                                 m_permutations.push_back(permutation);
14582                                 flavorNames.push_back(name);
14583
14584                         } while(std::next_permutation(indices.begin(), indices.end()));
14585
14586                         permutationsFlavorEnd = flavorNames.size();
14587                 }
14588
14589                 fp16AllComponents::setArgCompCount(argNo, compCount);
14590         }
14591         virtual double getULPs(vector<const deFloat16*>& in)
14592         {
14593                 DE_UNREF(in);
14594
14595                 return 8.0;
14596         }
14597
14598         template<class fp16type>
14599         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14600         {
14601                 DE_ASSERT(in.size() == 1);
14602                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
14603
14604                 if (getFlavor() == 0)
14605                 {
14606                         fp16type        r(0.0);
14607
14608                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14609                         {
14610                                 const fp16type  x       (in[0][componentNdx]);
14611                                 const fp16type  q       (x.asDouble() * x.asDouble());
14612
14613                                 r = fp16type(r.asDouble() + q.asDouble());
14614                         }
14615
14616                         r = fp16type(deSqrt(r.asDouble()));
14617
14618                         if (r.isZero())
14619                                 return false;
14620
14621                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14622                         {
14623                                 const fp16type  x       (in[0][componentNdx]);
14624
14625                                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
14626                         }
14627                 }
14628                 else if (getFlavor() == 1)
14629                 {
14630                         double  r(0.0);
14631
14632                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14633                         {
14634                                 const fp16type  x       (in[0][componentNdx]);
14635                                 const double    q       (x.asDouble() * x.asDouble());
14636
14637                                 r += q;
14638                         }
14639
14640                         r = deSqrt(r);
14641
14642                         if (r == 0)
14643                                 return false;
14644
14645                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
14646                         {
14647                                 const fp16type  x       (in[0][componentNdx]);
14648
14649                                 out[componentNdx] = fp16type(x.asDouble() / r).bits();
14650                         }
14651                 }
14652                 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
14653                 {
14654                         const int                       compCount               (static_cast<int>(getArgCompCount(0)));
14655                         const size_t            permutationNdx  (getFlavor() - permutationsFlavorStart);
14656                         const tcu::UVec4&       permutation             (m_permutations[permutationNdx]);
14657                         fp16type                        r                               (0.0);
14658
14659                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
14660                         {
14661                                 const size_t    componentNdx    (permutation[permComponentNdx]);
14662                                 const fp16type  x                               (in[0][componentNdx]);
14663                                 const fp16type  q                               (x.asDouble() * x.asDouble());
14664
14665                                 r = fp16type(r.asDouble() + q.asDouble());
14666                         }
14667
14668                         r = fp16type(deSqrt(r.asDouble()));
14669
14670                         if (r.isZero())
14671                                 return false;
14672
14673                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
14674                         {
14675                                 const size_t    componentNdx    (permutation[permComponentNdx]);
14676                                 const fp16type  x                               (in[0][componentNdx]);
14677
14678                                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
14679                         }
14680                 }
14681                 else
14682                 {
14683                         TCU_THROW(InternalError, "Unknown flavor");
14684                 }
14685
14686                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14687                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
14688                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14689                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
14690
14691                 return true;
14692         }
14693
14694 private:
14695         std::vector<tcu::UVec4> m_permutations;
14696         size_t                                  permutationsFlavorStart;
14697         size_t                                  permutationsFlavorEnd;
14698 };
14699
14700 struct fp16FaceForward : public fp16AllComponents
14701 {
14702         virtual double getULPs(vector<const deFloat16*>& in)
14703         {
14704                 DE_UNREF(in);
14705
14706                 return 4.0;
14707         }
14708
14709         template<class fp16type>
14710         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14711         {
14712                 DE_ASSERT(in.size() == 3);
14713                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
14714                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
14715                 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
14716
14717                 fp16type        dp(0.0);
14718
14719                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
14720                 {
14721                         const fp16type  x       (in[1][componentNdx]);
14722                         const fp16type  y       (in[2][componentNdx]);
14723                         const double    xd      (x.asDouble());
14724                         const double    yd      (y.asDouble());
14725                         const fp16type  q       (xd * yd);
14726
14727                         dp = fp16type(dp.asDouble() + q.asDouble());
14728                 }
14729
14730                 if (dp.isNaN() || dp.isZero())
14731                         return false;
14732
14733                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14734                 {
14735                         const fp16type  n       (in[0][componentNdx]);
14736
14737                         out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
14738                 }
14739
14740                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14741                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
14742                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14743                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
14744
14745                 return true;
14746         }
14747 };
14748
14749 struct fp16Reflect : public fp16AllComponents
14750 {
14751         fp16Reflect() : fp16AllComponents()
14752         {
14753                 flavorNames.push_back("EmulatingFP16");
14754                 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
14755                 flavorNames.push_back("FloatCalc");
14756                 flavorNames.push_back("FloatCalc+KeepZeroSign");
14757                 flavorNames.push_back("EmulatingFP16+2Nfirst");
14758                 flavorNames.push_back("EmulatingFP16+2Ifirst");
14759         }
14760
14761         virtual double getULPs(vector<const deFloat16*>& in)
14762         {
14763                 DE_UNREF(in);
14764
14765                 return 256.0; // This is not a precision test. Value is not from spec
14766         }
14767
14768         template<class fp16type>
14769         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14770         {
14771                 DE_ASSERT(in.size() == 2);
14772                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
14773                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
14774
14775                 if (getFlavor() < 4)
14776                 {
14777                         const bool      keepZeroSign    ((flavor & 1) != 0 ? true : false);
14778                         const bool      floatCalc               ((flavor & 2) != 0 ? true : false);
14779
14780                         if (floatCalc)
14781                         {
14782                                 float   dp(0.0f);
14783
14784                                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
14785                                 {
14786                                         const fp16type  i       (in[0][componentNdx]);
14787                                         const fp16type  n       (in[1][componentNdx]);
14788                                         const float             id      (i.asFloat());
14789                                         const float             nd      (n.asFloat());
14790                                         const float             qd      (id * nd);
14791
14792                                         if (keepZeroSign)
14793                                                 dp = (componentNdx == 0) ? qd : dp + qd;
14794                                         else
14795                                                 dp = dp + qd;
14796                                 }
14797
14798                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14799                                 {
14800                                         const fp16type  i               (in[0][componentNdx]);
14801                                         const fp16type  n               (in[1][componentNdx]);
14802                                         const float             dpnd    (dp * n.asFloat());
14803                                         const float             dpn2d   (2.0f * dpnd);
14804                                         const float             idpn2d  (i.asFloat() - dpn2d);
14805                                         const fp16type  result  (idpn2d);
14806
14807                                         out[componentNdx] = result.bits();
14808                                 }
14809                         }
14810                         else
14811                         {
14812                                 fp16type        dp(0.0);
14813
14814                                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
14815                                 {
14816                                         const fp16type  i       (in[0][componentNdx]);
14817                                         const fp16type  n       (in[1][componentNdx]);
14818                                         const double    id      (i.asDouble());
14819                                         const double    nd      (n.asDouble());
14820                                         const fp16type  q       (id * nd);
14821
14822                                         if (keepZeroSign)
14823                                                 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
14824                                         else
14825                                                 dp = fp16type(dp.asDouble() + q.asDouble());
14826                                 }
14827
14828                                 if (dp.isNaN())
14829                                         return false;
14830
14831                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14832                                 {
14833                                         const fp16type  i               (in[0][componentNdx]);
14834                                         const fp16type  n               (in[1][componentNdx]);
14835                                         const fp16type  dpn             (dp.asDouble() * n.asDouble());
14836                                         const fp16type  dpn2    (2 * dpn.asDouble());
14837                                         const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
14838
14839                                         out[componentNdx] = idpn2.bits();
14840                                 }
14841                         }
14842                 }
14843                 else if (getFlavor() == 4)
14844                 {
14845                         fp16type        dp(0.0);
14846
14847                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
14848                         {
14849                                 const fp16type  i       (in[0][componentNdx]);
14850                                 const fp16type  n       (in[1][componentNdx]);
14851                                 const double    id      (i.asDouble());
14852                                 const double    nd      (n.asDouble());
14853                                 const fp16type  q       (id * nd);
14854
14855                                 dp = fp16type(dp.asDouble() + q.asDouble());
14856                         }
14857
14858                         if (dp.isNaN())
14859                                 return false;
14860
14861                         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14862                         {
14863                                 const fp16type  i               (in[0][componentNdx]);
14864                                 const fp16type  n               (in[1][componentNdx]);
14865                                 const fp16type  n2              (2 * n.asDouble());
14866                                 const fp16type  dpn2    (dp.asDouble() * n2.asDouble());
14867                                 const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
14868
14869                                 out[componentNdx] = idpn2.bits();
14870                         }
14871                 }
14872                 else if (getFlavor() == 5)
14873                 {
14874                         fp16type        dp2(0.0);
14875
14876                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
14877                         {
14878                                 const fp16type  i       (in[0][componentNdx]);
14879                                 const fp16type  n       (in[1][componentNdx]);
14880                                 const fp16type  i2      (2.0 * i.asDouble());
14881                                 const double    i2d     (i2.asDouble());
14882                                 const double    nd      (n.asDouble());
14883                                 const fp16type  q       (i2d * nd);
14884
14885                                 dp2 = fp16type(dp2.asDouble() + q.asDouble());
14886                         }
14887
14888                         if (dp2.isNaN())
14889                                 return false;
14890
14891                         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14892                         {
14893                                 const fp16type  i               (in[0][componentNdx]);
14894                                 const fp16type  n               (in[1][componentNdx]);
14895                                 const fp16type  dpn2    (dp2.asDouble() * n.asDouble());
14896                                 const fp16type  idpn2   (i.asDouble() - dpn2.asDouble());
14897
14898                                 out[componentNdx] = idpn2.bits();
14899                         }
14900                 }
14901                 else
14902                 {
14903                         TCU_THROW(InternalError, "Unknown flavor");
14904                 }
14905
14906                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14907                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
14908                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
14909                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
14910
14911                 return true;
14912         }
14913 };
14914
14915 struct fp16Refract : public fp16AllComponents
14916 {
14917         fp16Refract() : fp16AllComponents()
14918         {
14919                 flavorNames.push_back("EmulatingFP16");
14920                 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
14921                 flavorNames.push_back("FloatCalc");
14922                 flavorNames.push_back("FloatCalc+KeepZeroSign");
14923         }
14924
14925         virtual double getULPs(vector<const deFloat16*>& in)
14926         {
14927                 DE_UNREF(in);
14928
14929                 return 8192.0; // This is not a precision test. Value is not from spec
14930         }
14931
14932         template<class fp16type>
14933         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14934         {
14935                 DE_ASSERT(in.size() == 3);
14936                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
14937                 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
14938                 DE_ASSERT(getArgCompCount(2) == 1);
14939
14940                 const bool              keepZeroSign    ((flavor & 1) != 0 ? true : false);
14941                 const bool              doubleCalc              ((flavor & 2) != 0 ? true : false);
14942                 const fp16type  eta                             (*in[2]);
14943
14944                 if (doubleCalc)
14945                 {
14946                         double  dp      (0.0);
14947
14948                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
14949                         {
14950                                 const fp16type  i       (in[0][componentNdx]);
14951                                 const fp16type  n       (in[1][componentNdx]);
14952                                 const double    id      (i.asDouble());
14953                                 const double    nd      (n.asDouble());
14954                                 const double    qd      (id * nd);
14955
14956                                 if (keepZeroSign)
14957                                         dp = (componentNdx == 0) ? qd : dp + qd;
14958                                 else
14959                                         dp = dp + qd;
14960                         }
14961
14962                         const double    eta2    (eta.asDouble() * eta.asDouble());
14963                         const double    dp2             (dp * dp);
14964                         const double    dp1             (1.0 - dp2);
14965                         const double    dpe             (eta2 * dp1);
14966                         const double    k               (1.0 - dpe);
14967
14968                         if (k < 0.0)
14969                         {
14970                                 const fp16type  zero    (0.0);
14971
14972                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14973                                         out[componentNdx] = zero.bits();
14974                         }
14975                         else
14976                         {
14977                                 const double    sk      (deSqrt(k));
14978
14979                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
14980                                 {
14981                                         const fp16type  i               (in[0][componentNdx]);
14982                                         const fp16type  n               (in[1][componentNdx]);
14983                                         const double    etai    (i.asDouble() * eta.asDouble());
14984                                         const double    etadp   (eta.asDouble() * dp);
14985                                         const double    etadpk  (etadp + sk);
14986                                         const double    etadpkn (etadpk * n.asDouble());
14987                                         const double    full    (etai - etadpkn);
14988                                         const fp16type  result  (full);
14989
14990                                         if (result.isInf())
14991                                                 return false;
14992
14993                                         out[componentNdx] = result.bits();
14994                                 }
14995                         }
14996                 }
14997                 else
14998                 {
14999                         fp16type        dp      (0.0);
15000
15001                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15002                         {
15003                                 const fp16type  i       (in[0][componentNdx]);
15004                                 const fp16type  n       (in[1][componentNdx]);
15005                                 const double    id      (i.asDouble());
15006                                 const double    nd      (n.asDouble());
15007                                 const fp16type  q       (id * nd);
15008
15009                                 if (keepZeroSign)
15010                                         dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
15011                                 else
15012                                         dp = fp16type(dp.asDouble() + q.asDouble());
15013                         }
15014
15015                         if (dp.isNaN())
15016                                 return false;
15017
15018                         const fp16type  eta2(eta.asDouble() * eta.asDouble());
15019                         const fp16type  dp2     (dp.asDouble() * dp.asDouble());
15020                         const fp16type  dp1     (1.0 - dp2.asDouble());
15021                         const fp16type  dpe     (eta2.asDouble() * dp1.asDouble());
15022                         const fp16type  k       (1.0 - dpe.asDouble());
15023
15024                         if (k.asDouble() < 0.0)
15025                         {
15026                                 const fp16type  zero    (0.0);
15027
15028                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15029                                         out[componentNdx] = zero.bits();
15030                         }
15031                         else
15032                         {
15033                                 const fp16type  sk      (deSqrt(k.asDouble()));
15034
15035                                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
15036                                 {
15037                                         const fp16type  i               (in[0][componentNdx]);
15038                                         const fp16type  n               (in[1][componentNdx]);
15039                                         const fp16type  etai    (i.asDouble() * eta.asDouble());
15040                                         const fp16type  etadp   (eta.asDouble() * dp.asDouble());
15041                                         const fp16type  etadpk  (etadp.asDouble() + sk.asDouble());
15042                                         const fp16type  etadpkn (etadpk.asDouble() * n.asDouble());
15043                                         const fp16type  full    (etai.asDouble() - etadpkn.asDouble());
15044
15045                                         if (full.isNaN() || full.isInf())
15046                                                 return false;
15047
15048                                         out[componentNdx] = full.bits();
15049                                 }
15050                         }
15051                 }
15052
15053                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15054                         min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15055                 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15056                         max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15057
15058                 return true;
15059         }
15060 };
15061
15062 struct fp16Dot : public fp16AllComponents
15063 {
15064         fp16Dot() : fp16AllComponents()
15065         {
15066                 flavorNames.push_back("EmulatingFP16");
15067                 flavorNames.push_back("FloatCalc");
15068                 flavorNames.push_back("DoubleCalc");
15069
15070                 // flavorNames will be extended later
15071         }
15072
15073         virtual void    setArgCompCount                 (size_t argNo, size_t compCount)
15074         {
15075                 DE_ASSERT(argCompCount[argNo] == 0); // Once only
15076
15077                 if (argNo == 0 && argCompCount[argNo] == 0)
15078                 {
15079                         const size_t            maxPermutationsCount    = 24u; // Equal to 4!
15080                         std::vector<int>        indices;
15081
15082                         for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
15083                                 indices.push_back(static_cast<int>(componentNdx));
15084
15085                         m_permutations.reserve(maxPermutationsCount);
15086
15087                         permutationsFlavorStart = flavorNames.size();
15088
15089                         do
15090                         {
15091                                 tcu::UVec4      permutation;
15092                                 std::string     name            = "Permutted_";
15093
15094                                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
15095                                 {
15096                                         permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
15097                                         name += de::toString(indices[componentNdx]);
15098                                 }
15099
15100                                 m_permutations.push_back(permutation);
15101                                 flavorNames.push_back(name);
15102
15103                         } while(std::next_permutation(indices.begin(), indices.end()));
15104
15105                         permutationsFlavorEnd = flavorNames.size();
15106                 }
15107
15108                 fp16AllComponents::setArgCompCount(argNo, compCount);
15109         }
15110
15111         virtual double  getULPs(vector<const deFloat16*>& in)
15112         {
15113                 DE_UNREF(in);
15114
15115                 return 16.0; // This is not a precision test. Value is not from spec
15116         }
15117
15118         template<class fp16type>
15119         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15120         {
15121                 DE_ASSERT(in.size() == 2);
15122                 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15123                 DE_ASSERT(getOutCompCount() == 1);
15124
15125                 double  result  (0.0);
15126                 double  eps             (0.0);
15127
15128                 if (getFlavor() == 0)
15129                 {
15130                         fp16type        dp      (0.0);
15131
15132                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15133                         {
15134                                 const fp16type  x       (in[0][componentNdx]);
15135                                 const fp16type  y       (in[1][componentNdx]);
15136                                 const fp16type  q       (x.asDouble() * y.asDouble());
15137
15138                                 dp = fp16type(dp.asDouble() + q.asDouble());
15139                                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
15140                         }
15141
15142                         result = dp.asDouble();
15143                 }
15144                 else if (getFlavor() == 1)
15145                 {
15146                         float   dp      (0.0);
15147
15148                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15149                         {
15150                                 const fp16type  x       (in[0][componentNdx]);
15151                                 const fp16type  y       (in[1][componentNdx]);
15152                                 const float             q       (x.asFloat() * y.asFloat());
15153
15154                                 dp += q;
15155                                 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
15156                         }
15157
15158                         result = dp;
15159                 }
15160                 else if (getFlavor() == 2)
15161                 {
15162                         double  dp      (0.0);
15163
15164                         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
15165                         {
15166                                 const fp16type  x       (in[0][componentNdx]);
15167                                 const fp16type  y       (in[1][componentNdx]);
15168                                 const double    q       (x.asDouble() * y.asDouble());
15169
15170                                 dp += q;
15171                                 eps += floatFormat16.ulp(q, 2.0);
15172                         }
15173
15174                         result = dp;
15175                 }
15176                 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
15177                 {
15178                         const int                       compCount               (static_cast<int>(getArgCompCount(1)));
15179                         const size_t            permutationNdx  (getFlavor() - permutationsFlavorStart);
15180                         const tcu::UVec4&       permutation             (m_permutations[permutationNdx]);
15181                         fp16type                        dp                              (0.0);
15182
15183                         for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
15184                         {
15185                                 const size_t            componentNdx    (permutation[permComponentNdx]);
15186                                 const fp16type          x                               (in[0][componentNdx]);
15187                                 const fp16type          y                               (in[1][componentNdx]);
15188                                 const fp16type          q                               (x.asDouble() * y.asDouble());
15189
15190                                 dp = fp16type(dp.asDouble() + q.asDouble());
15191                                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
15192                         }
15193
15194                         result = dp.asDouble();
15195                 }
15196                 else
15197                 {
15198                         TCU_THROW(InternalError, "Unknown flavor");
15199                 }
15200
15201                 out[0] = fp16type(result).bits();
15202                 min[0] = result - eps;
15203                 max[0] = result + eps;
15204
15205                 return true;
15206         }
15207
15208 private:
15209         std::vector<tcu::UVec4> m_permutations;
15210         size_t                                  permutationsFlavorStart;
15211         size_t                                  permutationsFlavorEnd;
15212 };
15213
15214 struct fp16VectorTimesScalar : public fp16AllComponents
15215 {
15216         virtual double getULPs(vector<const deFloat16*>& in)
15217         {
15218                 DE_UNREF(in);
15219
15220                 return 2.0;
15221         }
15222
15223         template<class fp16type>
15224         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15225         {
15226                 DE_ASSERT(in.size() == 2);
15227                 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15228                 DE_ASSERT(getArgCompCount(1) == 1);
15229
15230                 fp16type        s       (*in[1]);
15231
15232                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15233                 {
15234                         const fp16type  x          (in[0][componentNdx]);
15235                         const double    result (s.asDouble() * x.asDouble());
15236                         const fp16type  m          (result);
15237
15238                         out[componentNdx] = m.bits();
15239                         min[componentNdx] = getMin(result, getULPs(in));
15240                         max[componentNdx] = getMax(result, getULPs(in));
15241                 }
15242
15243                 return true;
15244         }
15245 };
15246
15247 struct fp16MatrixBase : public fp16AllComponents
15248 {
15249         deUint32                getComponentValidity                    ()
15250         {
15251                 return static_cast<deUint32>(-1);
15252         }
15253
15254         inline size_t   getNdx                                                  (const size_t rowCount, const size_t col, const size_t row)
15255         {
15256                 const size_t minComponentCount  = 0;
15257                 const size_t maxComponentCount  = 3;
15258                 const size_t alignedRowsCount   = (rowCount == 3) ? 4 : rowCount;
15259
15260                 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
15261                 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
15262                 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
15263                 DE_UNREF(minComponentCount);
15264                 DE_UNREF(maxComponentCount);
15265
15266                 return col * alignedRowsCount + row;
15267         }
15268
15269         deUint32                getComponentMatrixValidityMask  (size_t cols, size_t rows)
15270         {
15271                 deUint32        result  = 0u;
15272
15273                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15274                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15275                         {
15276                                 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
15277
15278                                 DE_ASSERT(bitNdx < sizeof(result) * 8);
15279
15280                                 result |= (1<<bitNdx);
15281                         }
15282
15283                 return result;
15284         }
15285 };
15286
15287 template<size_t cols, size_t rows>
15288 struct fp16Transpose : public fp16MatrixBase
15289 {
15290         virtual double getULPs(vector<const deFloat16*>& in)
15291         {
15292                 DE_UNREF(in);
15293
15294                 return 1.0;
15295         }
15296
15297         deUint32        getComponentValidity    ()
15298         {
15299                 return getComponentMatrixValidityMask(rows, cols);
15300         }
15301
15302         template<class fp16type>
15303         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15304         {
15305                 DE_ASSERT(in.size() == 1);
15306
15307                 const size_t            alignedCols     = (cols == 3) ? 4 : cols;
15308                 const size_t            alignedRows     = (rows == 3) ? 4 : rows;
15309                 vector<deFloat16>       output          (alignedCols * alignedRows, 0);
15310
15311                 DE_ASSERT(output.size() == alignedCols * alignedRows);
15312
15313                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15314                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15315                                 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
15316
15317                 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
15318                 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
15319                 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
15320
15321                 return true;
15322         }
15323 };
15324
15325 template<size_t cols, size_t rows>
15326 struct fp16MatrixTimesScalar : public fp16MatrixBase
15327 {
15328         virtual double getULPs(vector<const deFloat16*>& in)
15329         {
15330                 DE_UNREF(in);
15331
15332                 return 4.0;
15333         }
15334
15335         deUint32        getComponentValidity    ()
15336         {
15337                 return getComponentMatrixValidityMask(cols, rows);
15338         }
15339
15340         template<class fp16type>
15341         bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15342         {
15343                 DE_ASSERT(in.size() == 2);
15344                 DE_ASSERT(getArgCompCount(1) == 1);
15345
15346                 const fp16type  y                       (in[1][0]);
15347                 const float             scalar          (y.asFloat());
15348                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15349                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15350
15351                 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
15352                 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
15353                 DE_UNREF(alignedCols);
15354
15355                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15356                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15357                         {
15358                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
15359                                 const fp16type  x       (in[0][ndx]);
15360                                 const double    result  (scalar * x.asFloat());
15361
15362                                 out[ndx] = fp16type(result).bits();
15363                                 min[ndx] = getMin(result, getULPs(in));
15364                                 max[ndx] = getMax(result, getULPs(in));
15365                         }
15366
15367                 return true;
15368         }
15369 };
15370
15371 template<size_t cols, size_t rows>
15372 struct fp16VectorTimesMatrix : public fp16MatrixBase
15373 {
15374         fp16VectorTimesMatrix() : fp16MatrixBase()
15375         {
15376                 flavorNames.push_back("EmulatingFP16");
15377                 flavorNames.push_back("FloatCalc");
15378         }
15379
15380         virtual double getULPs (vector<const deFloat16*>& in)
15381         {
15382                 DE_UNREF(in);
15383
15384                 return (8.0 * cols);
15385         }
15386
15387         deUint32 getComponentValidity ()
15388         {
15389                 return getComponentMatrixValidityMask(cols, 1);
15390         }
15391
15392         template<class fp16type>
15393         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15394         {
15395                 DE_ASSERT(in.size() == 2);
15396
15397                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15398                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15399
15400                 DE_ASSERT(getOutCompCount() == cols);
15401                 DE_ASSERT(getArgCompCount(0) == rows);
15402                 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
15403                 DE_UNREF(alignedCols);
15404
15405                 if (getFlavor() == 0)
15406                 {
15407                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15408                         {
15409                                 fp16type        s       (fp16type::zero(1));
15410
15411                                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15412                                 {
15413                                         const fp16type  v       (in[0][rowNdx]);
15414                                         const float             vf      (v.asFloat());
15415                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
15416                                         const fp16type  x       (in[1][ndx]);
15417                                         const float             xf      (x.asFloat());
15418                                         const fp16type  m       (vf * xf);
15419
15420                                         s = fp16type(s.asFloat() + m.asFloat());
15421                                 }
15422
15423                                 out[colNdx] = s.bits();
15424                                 min[colNdx] = getMin(s.asDouble(), getULPs(in));
15425                                 max[colNdx] = getMax(s.asDouble(), getULPs(in));
15426                         }
15427                 }
15428                 else if (getFlavor() == 1)
15429                 {
15430                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15431                         {
15432                                 float   s       (0.0f);
15433
15434                                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15435                                 {
15436                                         const fp16type  v       (in[0][rowNdx]);
15437                                         const float             vf      (v.asFloat());
15438                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
15439                                         const fp16type  x       (in[1][ndx]);
15440                                         const float             xf      (x.asFloat());
15441                                         const float             m       (vf * xf);
15442
15443                                         s += m;
15444                                 }
15445
15446                                 out[colNdx] = fp16type(s).bits();
15447                                 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
15448                                 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
15449                         }
15450                 }
15451                 else
15452                 {
15453                         TCU_THROW(InternalError, "Unknown flavor");
15454                 }
15455
15456                 return true;
15457         }
15458 };
15459
15460 template<size_t cols, size_t rows>
15461 struct fp16MatrixTimesVector : public fp16MatrixBase
15462 {
15463         fp16MatrixTimesVector() : fp16MatrixBase()
15464         {
15465                 flavorNames.push_back("EmulatingFP16");
15466                 flavorNames.push_back("FloatCalc");
15467         }
15468
15469         virtual double getULPs (vector<const deFloat16*>& in)
15470         {
15471                 DE_UNREF(in);
15472
15473                 return (8.0 * rows);
15474         }
15475
15476         deUint32 getComponentValidity ()
15477         {
15478                 return getComponentMatrixValidityMask(rows, 1);
15479         }
15480
15481         template<class fp16type>
15482         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15483         {
15484                 DE_ASSERT(in.size() == 2);
15485
15486                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15487                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15488
15489                 DE_ASSERT(getOutCompCount() == rows);
15490                 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
15491                 DE_ASSERT(getArgCompCount(1) == cols);
15492                 DE_UNREF(alignedCols);
15493
15494                 if (getFlavor() == 0)
15495                 {
15496                         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15497                         {
15498                                 fp16type        s       (fp16type::zero(1));
15499
15500                                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15501                                 {
15502                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
15503                                         const fp16type  x       (in[0][ndx]);
15504                                         const float             xf      (x.asFloat());
15505                                         const fp16type  v       (in[1][colNdx]);
15506                                         const float             vf      (v.asFloat());
15507                                         const fp16type  m       (vf * xf);
15508
15509                                         s = fp16type(s.asFloat() + m.asFloat());
15510                                 }
15511
15512                                 out[rowNdx] = s.bits();
15513                                 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
15514                                 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
15515                         }
15516                 }
15517                 else if (getFlavor() == 1)
15518                 {
15519                         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15520                         {
15521                                 float   s       (0.0f);
15522
15523                                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15524                                 {
15525                                         const size_t    ndx     (colNdx * alignedRows + rowNdx);
15526                                         const fp16type  x       (in[0][ndx]);
15527                                         const float             xf      (x.asFloat());
15528                                         const fp16type  v       (in[1][colNdx]);
15529                                         const float             vf      (v.asFloat());
15530                                         const float             m       (vf * xf);
15531
15532                                         s += m;
15533                                 }
15534
15535                                 out[rowNdx] = fp16type(s).bits();
15536                                 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
15537                                 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
15538                         }
15539                 }
15540                 else
15541                 {
15542                         TCU_THROW(InternalError, "Unknown flavor");
15543                 }
15544
15545                 return true;
15546         }
15547 };
15548
15549 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
15550 struct fp16MatrixTimesMatrix : public fp16MatrixBase
15551 {
15552         fp16MatrixTimesMatrix() : fp16MatrixBase()
15553         {
15554                 flavorNames.push_back("EmulatingFP16");
15555                 flavorNames.push_back("FloatCalc");
15556         }
15557
15558         virtual double getULPs (vector<const deFloat16*>& in)
15559         {
15560                 DE_UNREF(in);
15561
15562                 return 32.0;
15563         }
15564
15565         deUint32 getComponentValidity ()
15566         {
15567                 return getComponentMatrixValidityMask(colsR, rowsL);
15568         }
15569
15570         template<class fp16type>
15571         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15572         {
15573                 DE_STATIC_ASSERT(colsL == rowsR);
15574
15575                 DE_ASSERT(in.size() == 2);
15576
15577                 const size_t    alignedColsL    = (colsL == 3) ? 4 : colsL;
15578                 const size_t    alignedRowsL    = (rowsL == 3) ? 4 : rowsL;
15579                 const size_t    alignedColsR    = (colsR == 3) ? 4 : colsR;
15580                 const size_t    alignedRowsR    = (rowsR == 3) ? 4 : rowsR;
15581
15582                 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
15583                 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
15584                 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
15585                 DE_UNREF(alignedColsL);
15586                 DE_UNREF(alignedColsR);
15587
15588                 if (getFlavor() == 0)
15589                 {
15590                         for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
15591                         {
15592                                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
15593                                 {
15594                                         const size_t    ndx     (colNdx * alignedRowsL + rowNdx);
15595                                         fp16type                s       (fp16type::zero(1));
15596
15597                                         for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
15598                                         {
15599                                                 const size_t    ndxl    (commonNdx * alignedRowsL + rowNdx);
15600                                                 const fp16type  l               (in[0][ndxl]);
15601                                                 const float             lf              (l.asFloat());
15602                                                 const size_t    ndxr    (colNdx * alignedRowsR + commonNdx);
15603                                                 const fp16type  r               (in[1][ndxr]);
15604                                                 const float             rf              (r.asFloat());
15605                                                 const fp16type  m               (lf * rf);
15606
15607                                                 s = fp16type(s.asFloat() + m.asFloat());
15608                                         }
15609
15610                                         out[ndx] = s.bits();
15611                                         min[ndx] = getMin(s.asDouble(), getULPs(in));
15612                                         max[ndx] = getMax(s.asDouble(), getULPs(in));
15613                                 }
15614                         }
15615                 }
15616                 else if (getFlavor() == 1)
15617                 {
15618                         for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
15619                         {
15620                                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
15621                                 {
15622                                         const size_t    ndx     (colNdx * alignedRowsL + rowNdx);
15623                                         float                   s       (0.0f);
15624
15625                                         for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
15626                                         {
15627                                                 const size_t    ndxl    (commonNdx * alignedRowsL + rowNdx);
15628                                                 const fp16type  l               (in[0][ndxl]);
15629                                                 const float             lf              (l.asFloat());
15630                                                 const size_t    ndxr    (colNdx * alignedRowsR + commonNdx);
15631                                                 const fp16type  r               (in[1][ndxr]);
15632                                                 const float             rf              (r.asFloat());
15633                                                 const float             m               (lf * rf);
15634
15635                                                 s += m;
15636                                         }
15637
15638                                         out[ndx] = fp16type(s).bits();
15639                                         min[ndx] = getMin(static_cast<double>(s), getULPs(in));
15640                                         max[ndx] = getMax(static_cast<double>(s), getULPs(in));
15641                                 }
15642                         }
15643                 }
15644                 else
15645                 {
15646                         TCU_THROW(InternalError, "Unknown flavor");
15647                 }
15648
15649                 return true;
15650         }
15651 };
15652
15653 template<size_t cols, size_t rows>
15654 struct fp16OuterProduct : public fp16MatrixBase
15655 {
15656         virtual double getULPs (vector<const deFloat16*>& in)
15657         {
15658                 DE_UNREF(in);
15659
15660                 return 2.0;
15661         }
15662
15663         deUint32 getComponentValidity ()
15664         {
15665                 return getComponentMatrixValidityMask(cols, rows);
15666         }
15667
15668         template<class fp16type>
15669         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15670         {
15671                 DE_ASSERT(in.size() == 2);
15672
15673                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15674                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15675
15676                 DE_ASSERT(getArgCompCount(0) == rows);
15677                 DE_ASSERT(getArgCompCount(1) == cols);
15678                 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
15679                 DE_UNREF(alignedCols);
15680
15681                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15682                 {
15683                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15684                         {
15685                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
15686                                 const fp16type  x       (in[0][rowNdx]);
15687                                 const float             xf      (x.asFloat());
15688                                 const fp16type  y       (in[1][colNdx]);
15689                                 const float             yf      (y.asFloat());
15690                                 const fp16type  m       (xf * yf);
15691
15692                                 out[ndx] = m.bits();
15693                                 min[ndx] = getMin(m.asDouble(), getULPs(in));
15694                                 max[ndx] = getMax(m.asDouble(), getULPs(in));
15695                         }
15696                 }
15697
15698                 return true;
15699         }
15700 };
15701
15702 template<size_t size>
15703 struct fp16Determinant;
15704
15705 template<>
15706 struct fp16Determinant<2> : public fp16MatrixBase
15707 {
15708         virtual double getULPs (vector<const deFloat16*>& in)
15709         {
15710                 DE_UNREF(in);
15711
15712                 return 128.0; // This is not a precision test. Value is not from spec
15713         }
15714
15715         deUint32 getComponentValidity ()
15716         {
15717                 return 1;
15718         }
15719
15720         template<class fp16type>
15721         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15722         {
15723                 const size_t    cols            = 2;
15724                 const size_t    rows            = 2;
15725                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15726                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15727
15728                 DE_ASSERT(in.size() == 1);
15729                 DE_ASSERT(getOutCompCount() == 1);
15730                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
15731                 DE_UNREF(alignedCols);
15732                 DE_UNREF(alignedRows);
15733
15734                 // [ a b ]
15735                 // [ c d ]
15736                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
15737                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
15738                 const float             c               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
15739                 const float             d               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
15740                 const float             ad              (a * d);
15741                 const fp16type  adf16   (ad);
15742                 const float             bc              (b * c);
15743                 const fp16type  bcf16   (bc);
15744                 const float             r               (adf16.asFloat() - bcf16.asFloat());
15745                 const fp16type  rf16    (r);
15746
15747                 out[0] = rf16.bits();
15748                 min[0] = getMin(r, getULPs(in));
15749                 max[0] = getMax(r, getULPs(in));
15750
15751                 return true;
15752         }
15753 };
15754
15755 template<>
15756 struct fp16Determinant<3> : public fp16MatrixBase
15757 {
15758         virtual double getULPs (vector<const deFloat16*>& in)
15759         {
15760                 DE_UNREF(in);
15761
15762                 return 128.0; // This is not a precision test. Value is not from spec
15763         }
15764
15765         deUint32 getComponentValidity ()
15766         {
15767                 return 1;
15768         }
15769
15770         template<class fp16type>
15771         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15772         {
15773                 const size_t    cols            = 3;
15774                 const size_t    rows            = 3;
15775                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15776                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15777
15778                 DE_ASSERT(in.size() == 1);
15779                 DE_ASSERT(getOutCompCount() == 1);
15780                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
15781                 DE_UNREF(alignedCols);
15782                 DE_UNREF(alignedRows);
15783
15784                 // [ a b c ]
15785                 // [ d e f ]
15786                 // [ g h i ]
15787                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
15788                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
15789                 const float             c               (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
15790                 const float             d               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
15791                 const float             e               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
15792                 const float             f               (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
15793                 const float             g               (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
15794                 const float             h               (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
15795                 const float             i               (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
15796                 const fp16type  aei             (a * e * i);
15797                 const fp16type  bfg             (b * f * g);
15798                 const fp16type  cdh             (c * d * h);
15799                 const fp16type  ceg             (c * e * g);
15800                 const fp16type  bdi             (b * d * i);
15801                 const fp16type  afh             (a * f * h);
15802                 const float             r               (aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
15803                 const fp16type  rf16    (r);
15804
15805                 out[0] = rf16.bits();
15806                 min[0] = getMin(r, getULPs(in));
15807                 max[0] = getMax(r, getULPs(in));
15808
15809                 return true;
15810         }
15811 };
15812
15813 template<>
15814 struct fp16Determinant<4> : public fp16MatrixBase
15815 {
15816         virtual double getULPs (vector<const deFloat16*>& in)
15817         {
15818                 DE_UNREF(in);
15819
15820                 return 128.0; // This is not a precision test. Value is not from spec
15821         }
15822
15823         deUint32 getComponentValidity ()
15824         {
15825                 return 1;
15826         }
15827
15828         template<class fp16type>
15829         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15830         {
15831                 const size_t    rows            = 4;
15832                 const size_t    cols            = 4;
15833                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15834                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15835
15836                 DE_ASSERT(in.size() == 1);
15837                 DE_ASSERT(getOutCompCount() == 1);
15838                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
15839                 DE_UNREF(alignedCols);
15840                 DE_UNREF(alignedRows);
15841
15842                 // [ a b c d ]
15843                 // [ e f g h ]
15844                 // [ i j k l ]
15845                 // [ m n o p ]
15846                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
15847                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
15848                 const float             c               (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
15849                 const float             d               (fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
15850                 const float             e               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
15851                 const float             f               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
15852                 const float             g               (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
15853                 const float             h               (fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
15854                 const float             i               (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
15855                 const float             j               (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
15856                 const float             k               (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
15857                 const float             l               (fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
15858                 const float             m               (fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
15859                 const float             n               (fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
15860                 const float             o               (fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
15861                 const float             p               (fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
15862
15863                 // [ f g h ]
15864                 // [ j k l ]
15865                 // [ n o p ]
15866                 const fp16type  fkp             (f * k * p);
15867                 const fp16type  gln             (g * l * n);
15868                 const fp16type  hjo             (h * j * o);
15869                 const fp16type  hkn             (h * k * n);
15870                 const fp16type  gjp             (g * j * p);
15871                 const fp16type  flo             (f * l * o);
15872                 const fp16type  detA    (a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
15873
15874                 // [ e g h ]
15875                 // [ i k l ]
15876                 // [ m o p ]
15877                 const fp16type  ekp             (e * k * p);
15878                 const fp16type  glm             (g * l * m);
15879                 const fp16type  hio             (h * i * o);
15880                 const fp16type  hkm             (h * k * m);
15881                 const fp16type  gip             (g * i * p);
15882                 const fp16type  elo             (e * l * o);
15883                 const fp16type  detB    (b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
15884
15885                 // [ e f h ]
15886                 // [ i j l ]
15887                 // [ m n p ]
15888                 const fp16type  ejp             (e * j * p);
15889                 const fp16type  flm             (f * l * m);
15890                 const fp16type  hin             (h * i * n);
15891                 const fp16type  hjm             (h * j * m);
15892                 const fp16type  fip             (f * i * p);
15893                 const fp16type  eln             (e * l * n);
15894                 const fp16type  detC    (c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
15895
15896                 // [ e f g ]
15897                 // [ i j k ]
15898                 // [ m n o ]
15899                 const fp16type  ejo             (e * j * o);
15900                 const fp16type  fkm             (f * k * m);
15901                 const fp16type  gin             (g * i * n);
15902                 const fp16type  gjm             (g * j * m);
15903                 const fp16type  fio             (f * i * o);
15904                 const fp16type  ekn             (e * k * n);
15905                 const fp16type  detD    (d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
15906
15907                 const float             r               (detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
15908                 const fp16type  rf16    (r);
15909
15910                 out[0] = rf16.bits();
15911                 min[0] = getMin(r, getULPs(in));
15912                 max[0] = getMax(r, getULPs(in));
15913
15914                 return true;
15915         }
15916 };
15917
15918 template<size_t size>
15919 struct fp16Inverse;
15920
15921 template<>
15922 struct fp16Inverse<2> : public fp16MatrixBase
15923 {
15924         virtual double getULPs (vector<const deFloat16*>& in)
15925         {
15926                 DE_UNREF(in);
15927
15928                 return 128.0; // This is not a precision test. Value is not from spec
15929         }
15930
15931         deUint32 getComponentValidity ()
15932         {
15933                 return getComponentMatrixValidityMask(2, 2);
15934         }
15935
15936         template<class fp16type>
15937         bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15938         {
15939                 const size_t    cols            = 2;
15940                 const size_t    rows            = 2;
15941                 const size_t    alignedCols     = (cols == 3) ? 4 : cols;
15942                 const size_t    alignedRows     = (rows == 3) ? 4 : rows;
15943
15944                 DE_ASSERT(in.size() == 1);
15945                 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
15946                 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
15947                 DE_UNREF(alignedCols);
15948
15949                 // [ a b ]
15950                 // [ c d ]
15951                 const float             a               (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
15952                 const float             b               (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
15953                 const float             c               (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
15954                 const float             d               (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
15955                 const float             ad              (a * d);
15956                 const fp16type  adf16   (ad);
15957                 const float             bc              (b * c);
15958                 const fp16type  bcf16   (bc);
15959                 const float             det             (adf16.asFloat() - bcf16.asFloat());
15960                 const fp16type  det16   (det);
15961
15962                 out[0] = fp16type( d / det16.asFloat()).bits();
15963                 out[1] = fp16type(-c / det16.asFloat()).bits();
15964                 out[2] = fp16type(-b / det16.asFloat()).bits();
15965                 out[3] = fp16type( a / det16.asFloat()).bits();
15966
15967                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
15968                         for (size_t colNdx = 0; colNdx < cols; ++colNdx)
15969                         {
15970                                 const size_t    ndx     (colNdx * alignedRows + rowNdx);
15971                                 const fp16type  s       (out[ndx]);
15972
15973                                 min[ndx] = getMin(s.asDouble(), getULPs(in));
15974                                 max[ndx] = getMax(s.asDouble(), getULPs(in));
15975                         }
15976
15977                 return true;
15978         }
15979 };
15980
15981 inline std::string fp16ToString(deFloat16 val)
15982 {
15983         return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
15984 }
15985
15986 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
15987 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
15988 {
15989         if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
15990                 return false;
15991
15992         const size_t    resultStep                      = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
15993         const size_t    iterationsCount         = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
15994         const size_t    inputsSteps[3]          =
15995         {
15996                 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
15997                 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
15998                 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
15999         };
16000
16001         DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
16002         DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
16003
16004         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
16005         {
16006                 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
16007                 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
16008         }
16009
16010         const deFloat16* const          outputAsFP16                                    = (const deFloat16*)outputAllocs[0]->getHostPtr();
16011         TestedArithmeticFunction        func;
16012
16013         func.setOutCompCount(RES_COMPONENTS);
16014         func.setArgCompCount(0, ARG0_COMPONENTS);
16015         func.setArgCompCount(1, ARG1_COMPONENTS);
16016         func.setArgCompCount(2, ARG2_COMPONENTS);
16017
16018         const bool                                      callOncePerComponent                    = func.callOncePerComponent();
16019         const deUint32                          componentValidityMask                   = func.getComponentValidity();
16020         const size_t                            denormModesCount                                = 2;
16021         const char*                                     denormModes[denormModesCount]   = { "keep denormal numbers", "flush to zero" };
16022         const size_t                            successfulRunsPerComponent              = denormModesCount * func.getFlavorCount();
16023         bool                                            success                                                 = true;
16024         size_t                                          validatedCount                                  = 0;
16025
16026         vector<deUint8> inputBytes[3];
16027
16028         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
16029                 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
16030
16031         const deFloat16* const                  inputsAsFP16[3]                 =
16032         {
16033                 inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
16034                 inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
16035                 inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
16036         };
16037
16038         for (size_t idx = 0; idx < iterationsCount; ++idx)
16039         {
16040                 std::vector<size_t>                     successfulRuns          (RES_COMPONENTS, successfulRunsPerComponent);
16041                 std::vector<std::string>        errors                          (RES_COMPONENTS);
16042                 bool                                            iterationValidated      (true);
16043
16044                 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
16045                 {
16046                         for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
16047                         {
16048                                 func.setFlavor(flavorNdx);
16049
16050                                 const deFloat16*                        iterationOutputFP16             = &outputAsFP16[idx * resultStep];
16051                                 vector<deFloat16>                       iterationCalculatedFP16 (resultStep, 0);
16052                                 vector<double>                          iterationEdgeMin                (resultStep, 0.0);
16053                                 vector<double>                          iterationEdgeMax                (resultStep, 0.0);
16054                                 vector<const deFloat16*>        arguments;
16055
16056                                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
16057                                 {
16058                                         std::string     error;
16059                                         bool            reportError = false;
16060
16061                                         if (callOncePerComponent || componentNdx == 0)
16062                                         {
16063                                                 bool funcCallResult;
16064
16065                                                 arguments.clear();
16066
16067                                                 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
16068                                                         arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
16069
16070                                                 if (denormNdx == 0)
16071                                                         funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
16072                                                 else
16073                                                         funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
16074
16075                                                 if (!funcCallResult)
16076                                                 {
16077                                                         iterationValidated = false;
16078
16079                                                         if (callOncePerComponent)
16080                                                                 continue;
16081                                                         else
16082                                                                 break;
16083                                                 }
16084                                         }
16085
16086                                         if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
16087                                                 continue;
16088
16089                                         reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
16090
16091                                         if (reportError)
16092                                         {
16093                                                 tcu::Float16 expected   (iterationCalculatedFP16[componentNdx]);
16094                                                 tcu::Float16 outputted  (iterationOutputFP16[componentNdx]);
16095
16096                                                 if (reportError && expected.isNaN())
16097                                                         reportError = false;
16098
16099                                                 if (reportError && !expected.isNaN() && !outputted.isNaN())
16100                                                 {
16101                                                         if (reportError && !expected.isInf() && !outputted.isInf())
16102                                                         {
16103                                                                 // Ignore rounding
16104                                                                 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
16105                                                                         reportError = false;
16106                                                         }
16107
16108                                                         if (reportError && expected.isInf())
16109                                                         {
16110                                                                 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
16111                                                                 if (expected.sign() == 1 && outputted.bits() == 0x7bff && iterationEdgeMin[componentNdx] <= std::numeric_limits<double>::max())
16112                                                                         reportError = false;
16113                                                                 else if (expected.sign() == -1 && outputted.bits() == 0xfbff && iterationEdgeMax[componentNdx] >= -std::numeric_limits<double>::max())
16114                                                                         reportError = false;
16115                                                         }
16116
16117                                                         if (reportError)
16118                                                         {
16119                                                                 const double    outputtedDouble = outputted.asDouble();
16120
16121                                                                 DE_ASSERT(iterationEdgeMin[componentNdx] <= iterationEdgeMax[componentNdx]);
16122
16123                                                                 if (de::inRange(outputtedDouble, iterationEdgeMin[componentNdx], iterationEdgeMax[componentNdx]))
16124                                                                         reportError = false;
16125                                                         }
16126                                                 }
16127
16128                                                 if (reportError)
16129                                                 {
16130                                                         const size_t            inputsComps[3]  =
16131                                                         {
16132                                                                 ARG0_COMPONENTS,
16133                                                                 ARG1_COMPONENTS,
16134                                                                 ARG2_COMPONENTS,
16135                                                         };
16136                                                         string                          inputsValues    ("Inputs:");
16137                                                         string                          flavorName              (func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
16138                                                         std::stringstream       errStream;
16139
16140                                                         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
16141                                                         {
16142                                                                 const size_t    inputCompsCount = inputsComps[inputNdx];
16143
16144                                                                 inputsValues += " [" + de::toString(inputNdx) + "]=(";
16145
16146                                                                 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
16147                                                                 {
16148                                                                         const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
16149
16150                                                                         inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
16151                                                                 }
16152                                                         }
16153
16154                                                         errStream       << "At"
16155                                                                                 << " iteration " << de::toString(idx)
16156                                                                                 << " component " << de::toString(componentNdx)
16157                                                                                 << " denormMode " << de::toString(denormNdx)
16158                                                                                 << " (" << denormModes[denormNdx] << ")"
16159                                                                                 << " " << flavorName
16160                                                                                 << " " << inputsValues
16161                                                                                 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
16162                                                                                 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
16163                                                                                 << " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
16164                                                                                 << " " << error << "."
16165                                                                                 << std::endl;
16166
16167                                                         errors[componentNdx] += errStream.str();
16168
16169                                                         successfulRuns[componentNdx]--;
16170                                                 }
16171                                         }
16172                                 }
16173                         }
16174                 }
16175
16176                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
16177                 {
16178                         // Check if any component has total failure
16179                         if (successfulRuns[componentNdx] == 0)
16180                         {
16181                                 // Test failed in all denorm modes and all flavors for certain component: dump errors
16182                                 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
16183
16184                                 success = false;
16185                         }
16186                 }
16187
16188                 if (iterationValidated)
16189                         validatedCount++;
16190         }
16191
16192         if (validatedCount < 16)
16193                 TCU_THROW(InternalError, "Too few samples has been validated.");
16194
16195         return success;
16196 }
16197
16198 // IEEE-754 floating point numbers:
16199 // +--------+------+----------+-------------+
16200 // | binary | sign | exponent | significand |
16201 // +--------+------+----------+-------------+
16202 // | 16-bit |  1   |    5     |     10      |
16203 // +--------+------+----------+-------------+
16204 // | 32-bit |  1   |    8     |     23      |
16205 // +--------+------+----------+-------------+
16206 //
16207 // 16-bit floats:
16208 //
16209 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
16210 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
16211 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
16212 // 0   111 10   11 1111 1111 (0x7bff: 65504:         maximum positive normalized)
16213 //
16214 // 0   000 00   00 0000 0000 (0x0000: +0)
16215 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
16216 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
16217 // 0   000 01   00 0000 0001 (0x0401: +Norm)
16218 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
16219 // 0   111 11   11 1111 0000 (0x7ff0: +QNaN)
16220 // Generate and return 16-bit floats and their corresponding 32-bit values.
16221 //
16222 // The first 14 number pairs are manually picked, while the rest are randomly generated.
16223 // Expected count to be at least 14 (numPicks).
16224 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
16225 {
16226         vector<deFloat16>       float16;
16227
16228         float16.reserve(count);
16229
16230         // Zero
16231         float16.push_back(deUint16(0x0000));
16232         float16.push_back(deUint16(0x8000));
16233         // Infinity
16234         float16.push_back(deUint16(0x7c00));
16235         float16.push_back(deUint16(0xfc00));
16236         // Normalized
16237         float16.push_back(deUint16(0x0401));
16238         float16.push_back(deUint16(0x8401));
16239         // Some normal number
16240         float16.push_back(deUint16(0x14cb));
16241         float16.push_back(deUint16(0x94cb));
16242         // Min/max positive normal
16243         float16.push_back(deUint16(0x0400));
16244         float16.push_back(deUint16(0x7bff));
16245         // Min/max negative normal
16246         float16.push_back(deUint16(0x8400));
16247         float16.push_back(deUint16(0xfbff));
16248         // PI
16249         float16.push_back(deUint16(0x4248)); // 3.140625
16250         float16.push_back(deUint16(0xb248)); // -3.140625
16251         // PI/2
16252         float16.push_back(deUint16(0x3e48)); // 1.5703125
16253         float16.push_back(deUint16(0xbe48)); // -1.5703125
16254         float16.push_back(deUint16(0x3c00)); // 1.0
16255         float16.push_back(deUint16(0x3800)); // 0.5
16256         // Some useful constants
16257         float16.push_back(tcu::Float16(-2.5f).bits());
16258         float16.push_back(tcu::Float16(-1.0f).bits());
16259         float16.push_back(tcu::Float16( 0.4f).bits());
16260         float16.push_back(tcu::Float16( 2.5f).bits());
16261
16262         const deUint32          numPicks        = static_cast<deUint32>(float16.size());
16263
16264         DE_ASSERT(count >= numPicks);
16265         count -= numPicks;
16266
16267         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
16268         {
16269                 int                     sign            = (rnd.getUint16() % 2 == 0) ? +1 : -1;
16270                 int                     exponent        = (rnd.getUint16() % 29) - 14 + 1;
16271                 deUint16        mantissa        = static_cast<deUint16>(2 * (rnd.getUint16() % 512));
16272
16273                 // Exclude power of -14 to avoid denorms
16274                 DE_ASSERT(de::inRange(exponent, -13, 15));
16275
16276                 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
16277         }
16278
16279         return float16;
16280 }
16281
16282 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
16283 {
16284         DE_UNREF(argNo);
16285
16286         de::Random      rnd(seed);
16287
16288         return getFloat16a(rnd, static_cast<deUint32>(count));
16289 }
16290
16291 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
16292 {
16293         de::Random      rnd             (seed);
16294         size_t          newCount = static_cast<size_t>(deSqrt(double(count)));
16295
16296         DE_ASSERT(newCount * newCount == count);
16297
16298         vector<deFloat16>       float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
16299
16300         return squarize(float16, static_cast<deUint32>(argNo));
16301 }
16302
16303 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
16304 {
16305         if (argNo == 0 || argNo == 1)
16306                 return getInputData2(seed, count, argNo);
16307         else
16308                 return getInputData1(seed<<argNo, count, argNo);
16309 }
16310
16311 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16312 {
16313         DE_UNREF(stride);
16314
16315         vector<deFloat16>       result;
16316
16317         switch (argCount)
16318         {
16319                 case 1:result = getInputData1(seed, count, argNo); break;
16320                 case 2:result = getInputData2(seed, count, argNo); break;
16321                 case 3:result = getInputData3(seed, count, argNo); break;
16322                 default: TCU_THROW(InternalError, "Invalid argument count specified");
16323         }
16324
16325         if (compCount == 3)
16326         {
16327                 const size_t            newCount = (3 * count) / 4;
16328                 vector<deFloat16>       newResult;
16329
16330                 newResult.reserve(result.size());
16331
16332                 for (size_t ndx = 0; ndx < newCount; ++ndx)
16333                 {
16334                         newResult.push_back(result[ndx]);
16335
16336                         if (ndx % 3 == 2)
16337                                 newResult.push_back(0);
16338                 }
16339
16340                 result = newResult;
16341         }
16342
16343         DE_ASSERT(result.size() == count);
16344
16345         return result;
16346 }
16347
16348 // Generator for functions requiring data in range [1, inf]
16349 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16350 {
16351         vector<deFloat16>       result;
16352
16353         result = getInputData(seed, count, compCount, stride, argCount, argNo);
16354
16355         // Filter out values below 1.0 from upper half of numbers
16356         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
16357         {
16358                 const float f = tcu::Float16(result[idx]).asFloat();
16359
16360                 if (f < 1.0f)
16361                         result[idx] = tcu::Float16(1.0f - f).bits();
16362         }
16363
16364         return result;
16365 }
16366
16367 // Generator for functions requiring data in range [-1, 1]
16368 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16369 {
16370         vector<deFloat16>       result;
16371
16372         result = getInputData(seed, count, compCount, stride, argCount, argNo);
16373
16374         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
16375         {
16376                 const float f = tcu::Float16(result[idx]).asFloat();
16377
16378                 if (!de::inRange(f, -1.0f, 1.0f))
16379                         result[idx] = tcu::Float16(deFloatFrac(f)).bits();
16380         }
16381
16382         return result;
16383 }
16384
16385 // Generator for functions requiring data in range [-pi, pi]
16386 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16387 {
16388         vector<deFloat16>       result;
16389
16390         result = getInputData(seed, count, compCount, stride, argCount, argNo);
16391
16392         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
16393         {
16394                 const float f = tcu::Float16(result[idx]).asFloat();
16395
16396                 if (!de::inRange(f, -DE_PI, DE_PI))
16397                         result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
16398         }
16399
16400         return result;
16401 }
16402
16403 // Generator for functions requiring data in range [0, inf]
16404 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16405 {
16406         vector<deFloat16>       result;
16407
16408         result = getInputData(seed, count, compCount, stride, argCount, argNo);
16409
16410         if (argNo == 0)
16411         {
16412                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
16413                         result[idx] &= static_cast<deFloat16>(~0x8000);
16414         }
16415
16416         return result;
16417 }
16418
16419 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16420 {
16421         DE_UNREF(stride);
16422         DE_UNREF(argCount);
16423
16424         vector<deFloat16>       result;
16425
16426         if (argNo == 0)
16427                 result = getInputData2(seed, count, argNo);
16428         else
16429         {
16430                 const size_t            alignedCount    = (compCount == 3) ? 4 : compCount;
16431                 const size_t            newCountX               = static_cast<size_t>(deSqrt(double(count * alignedCount)));
16432                 const size_t            newCountY               = count / newCountX;
16433                 de::Random                      rnd                             (seed);
16434                 vector<deFloat16>       float16                 = getFloat16a(rnd, static_cast<deUint32>(newCountX));
16435
16436                 DE_ASSERT(newCountX * newCountX == alignedCount * count);
16437
16438                 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
16439                 {
16440                         const vector<deFloat16> tmp(newCountY, float16[numIdx]);
16441
16442                         result.insert(result.end(), tmp.begin(), tmp.end());
16443                 }
16444         }
16445
16446         DE_ASSERT(result.size() == count);
16447
16448         return result;
16449 }
16450
16451 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16452 {
16453         DE_UNREF(compCount);
16454         DE_UNREF(stride);
16455         DE_UNREF(argCount);
16456
16457         de::Random                      rnd             (seed << argNo);
16458         vector<deFloat16>       result;
16459
16460         result = getFloat16a(rnd, static_cast<deUint32>(count));
16461
16462         DE_ASSERT(result.size() == count);
16463
16464         return result;
16465 }
16466
16467 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16468 {
16469         DE_UNREF(compCount);
16470         DE_UNREF(argCount);
16471
16472         de::Random                      rnd             (seed << argNo);
16473         vector<deFloat16>       result;
16474
16475         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
16476         {
16477                 int num = (rnd.getUint16() % 16) - 8;
16478
16479                 result.push_back(tcu::Float16(float(num)).bits());
16480         }
16481
16482         result[0 * stride] = deUint16(0x7c00); // +Inf
16483         result[1 * stride] = deUint16(0xfc00); // -Inf
16484
16485         DE_ASSERT(result.size() == count);
16486
16487         return result;
16488 }
16489
16490 // Generator for smoothstep function
16491 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16492 {
16493         vector<deFloat16>       result;
16494
16495         result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
16496
16497         if (argNo == 0)
16498         {
16499                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
16500                 {
16501                         const float f = tcu::Float16(result[idx]).asFloat();
16502
16503                         if (f > 4.0f)
16504                                 result[idx] = tcu::Float16(-f).bits();
16505                 }
16506         }
16507
16508         if (argNo == 1)
16509         {
16510                 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
16511                 {
16512                         const float f = tcu::Float16(result[idx]).asFloat();
16513
16514                         if (f < 4.0f)
16515                                 result[idx] = tcu::Float16(-f).bits();
16516                 }
16517         }
16518
16519         return result;
16520 }
16521
16522 // Generates normalized vectors for arguments 0 and 1
16523 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16524 {
16525         DE_UNREF(compCount);
16526         DE_UNREF(argCount);
16527
16528         de::Random                      rnd             (seed << argNo);
16529         vector<deFloat16>       result;
16530
16531         if (argNo == 0 || argNo == 1)
16532         {
16533                 // The input parameters for the incident vector I and the surface normal N must already be normalized
16534                 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
16535                 {
16536                         vector <float>  unnormolized;
16537                         float                   sum                             = 0;
16538
16539                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
16540                                 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
16541
16542                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
16543                                 sum += unnormolized[compIdx] * unnormolized[compIdx];
16544
16545                         sum = deFloatSqrt(sum);
16546                         if (sum == 0.0f)
16547                                 unnormolized[0] = sum = 1.0f;
16548
16549                         for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
16550                                 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
16551
16552                         for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
16553                                 result.push_back(0);
16554                 }
16555         }
16556         else
16557         {
16558                 // Input parameter eta
16559                 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
16560                 {
16561                         int num = (rnd.getUint16() % 16) - 8;
16562
16563                         result.push_back(tcu::Float16(float(num)).bits());
16564                 }
16565         }
16566
16567         DE_ASSERT(result.size() == count);
16568
16569         return result;
16570 }
16571
16572 // Data generator for complex matrix functions like determinant and inverse
16573 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
16574 {
16575         DE_UNREF(compCount);
16576         DE_UNREF(stride);
16577         DE_UNREF(argCount);
16578
16579         de::Random                      rnd             (seed << argNo);
16580         vector<deFloat16>       result;
16581
16582         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
16583         {
16584                 int num = (rnd.getUint16() % 16) - 8;
16585
16586                 result.push_back(tcu::Float16(float(num)).bits());
16587         }
16588
16589         DE_ASSERT(result.size() == count);
16590
16591         return result;
16592 }
16593
16594 struct Math16TestType
16595 {
16596         const char*             typePrefix;
16597         const size_t    typeComponents;
16598         const size_t    typeArrayStride;
16599         const size_t    typeStructStride;
16600         const char*             storage_type;
16601 };
16602
16603 enum Math16DataTypes
16604 {
16605         NONE    = 0,
16606         SCALAR  = 1,
16607         VEC2    = 2,
16608         VEC3    = 3,
16609         VEC4    = 4,
16610         MAT2X2,
16611         MAT2X3,
16612         MAT2X4,
16613         MAT3X2,
16614         MAT3X3,
16615         MAT3X4,
16616         MAT4X2,
16617         MAT4X3,
16618         MAT4X4,
16619         MATH16_TYPE_LAST
16620 };
16621
16622 struct Math16ArgFragments
16623 {
16624         const char*     bodies;
16625         const char*     variables;
16626         const char*     decorations;
16627         const char*     funcVariables;
16628 };
16629
16630 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
16631
16632 struct Math16TestFunc
16633 {
16634         const char*                                     funcName;
16635         const char*                                     funcSuffix;
16636         size_t                                          funcArgsCount;
16637         size_t                                          typeResult;
16638         size_t                                          typeArg0;
16639         size_t                                          typeArg1;
16640         size_t                                          typeArg2;
16641         Math16GetInputData*                     getInputDataFunc;
16642         VerifyIOFunc                            verifyFunc;
16643 };
16644
16645 template<class SpecResource>
16646 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
16647 {
16648         const int                                       testSpecificSeed                        = deStringHash(testGroup.getName());
16649         const int                                       seed                                            = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
16650         const size_t                            numDataPointsByAxis                     = 32;
16651         const size_t                            numDataPoints                           = numDataPointsByAxis * numDataPointsByAxis;
16652         const char*                                     componentType                           = "f16";
16653         const Math16TestType            testTypes[MATH16_TYPE_LAST]     =
16654         {
16655                 { "",           0,       0,                                              0,                                             "" },
16656                 { "",           1,       1 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_half_ndp" },
16657                 { "v2",         2,       2 * sizeof(deFloat16),  2 * sizeof(deFloat16), "u32_ndp" },
16658                 { "v3",         3,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
16659                 { "v4",         4,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
16660                 { "m2x2",       0,       4 * sizeof(deFloat16),  4 * sizeof(deFloat16), "u32_ndp_2" },
16661                 { "m2x3",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
16662                 { "m2x4",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
16663                 { "m3x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_3" },
16664                 { "m3x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
16665                 { "m3x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
16666                 { "m4x2",       0,       8 * sizeof(deFloat16),  8 * sizeof(deFloat16), "u32_ndp_4" },
16667                 { "m4x3",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
16668                 { "m4x4",       0,      16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
16669         };
16670
16671         DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
16672
16673
16674         const StringTemplate preMain
16675         (
16676                 "     %c_i32_ndp  = OpConstant %i32 ${num_data_points}\n"
16677
16678                 "        %f16     = OpTypeFloat 16\n"
16679                 "        %v2f16   = OpTypeVector %f16 2\n"
16680                 "        %v3f16   = OpTypeVector %f16 3\n"
16681                 "        %v4f16   = OpTypeVector %f16 4\n"
16682                 "        %m2x2f16 = OpTypeMatrix %v2f16 2\n"
16683                 "        %m2x3f16 = OpTypeMatrix %v3f16 2\n"
16684                 "        %m2x4f16 = OpTypeMatrix %v4f16 2\n"
16685                 "        %m3x2f16 = OpTypeMatrix %v2f16 3\n"
16686                 "        %m3x3f16 = OpTypeMatrix %v3f16 3\n"
16687                 "        %m3x4f16 = OpTypeMatrix %v4f16 3\n"
16688                 "        %m4x2f16 = OpTypeMatrix %v2f16 4\n"
16689                 "        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
16690                 "        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
16691
16692                 "       %fp_v2i32 = OpTypePointer Function %v2i32\n"
16693                 "       %fp_v3i32 = OpTypePointer Function %v3i32\n"
16694                 "       %fp_v4i32 = OpTypePointer Function %v4i32\n"
16695
16696                 "      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
16697                 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
16698                 "        %c_u32_5 = OpConstant %u32 5\n"
16699                 "        %c_u32_6 = OpConstant %u32 6\n"
16700                 "        %c_u32_7 = OpConstant %u32 7\n"
16701                 "        %c_u32_8 = OpConstant %u32 8\n"
16702                 "        %c_f16_0 = OpConstant %f16 0\n"
16703                 "        %c_f16_1 = OpConstant %f16 1\n"
16704                 "      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
16705                 "         %up_u32 = OpTypePointer Uniform %u32\n"
16706                 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
16707                 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
16708
16709                 "    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
16710                 "  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
16711                 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
16712                 "         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
16713                 "       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
16714                 "    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
16715                 "           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
16716                 "        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
16717                 "      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
16718                 "     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
16719                 "  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
16720                 "           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
16721                 "        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
16722                 "        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
16723                 "     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
16724                 "  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
16725                 "           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
16726                 "        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
16727                 "        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
16728                 "     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
16729                 "  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
16730                 "           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
16731                 "        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
16732                 "        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
16733                 "     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
16734                 "  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
16735                 "           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
16736                 "        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
16737                 "        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
16738                 "     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
16739                 "  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
16740
16741                 "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
16742                 "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
16743                 "       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
16744                 "       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
16745                 "     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
16746                 "     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
16747                 "     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
16748                 "     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
16749                 "     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
16750                 "     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
16751                 "     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
16752                 "     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
16753                 "     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
16754                 "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
16755                 "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
16756                 "  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
16757                 "  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
16758                 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
16759                 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
16760                 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
16761                 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
16762                 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
16763                 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
16764                 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
16765                 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
16766                 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
16767                 "${arg_vars}"
16768         );
16769
16770         const StringTemplate decoration
16771         (
16772                 "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
16773                 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
16774                 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
16775
16776                 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
16777                 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
16778                 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
16779
16780                 "OpDecorate %ra_u32_2 ArrayStride 4\n"
16781                 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
16782                 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
16783                 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
16784
16785                 "OpDecorate %ra_u32_4 ArrayStride 4\n"
16786                 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
16787                 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
16788                 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
16789
16790                 "OpDecorate %ra_u32_3 ArrayStride 4\n"
16791                 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
16792                 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
16793                 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
16794
16795                 "OpDecorate %ra_u32_6 ArrayStride 4\n"
16796                 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
16797                 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
16798                 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
16799
16800                 "OpDecorate %ra_u32_8 ArrayStride 4\n"
16801                 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
16802                 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
16803                 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
16804
16805                 "${arg_decorations}"
16806         );
16807
16808         const StringTemplate testFun
16809         (
16810                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
16811                 "    %param = OpFunctionParameter %v4f32\n"
16812                 "    %entry = OpLabel\n"
16813
16814                 "        %i = OpVariable %fp_i32 Function\n"
16815                 "${arg_infunc_vars}"
16816                 "             OpStore %i %c_i32_0\n"
16817                 "             OpBranch %loop\n"
16818
16819                 "     %loop = OpLabel\n"
16820                 "    %i_cmp = OpLoad %i32 %i\n"
16821                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
16822                 "             OpLoopMerge %merge %next None\n"
16823                 "             OpBranchConditional %lt %write %merge\n"
16824
16825                 "    %write = OpLabel\n"
16826                 "      %ndx = OpLoad %i32 %i\n"
16827
16828                 "${arg_func_call}"
16829
16830                 "             OpBranch %next\n"
16831
16832                 "     %next = OpLabel\n"
16833                 "    %i_cur = OpLoad %i32 %i\n"
16834                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
16835                 "             OpStore %i %i_new\n"
16836                 "             OpBranch %loop\n"
16837
16838                 "    %merge = OpLabel\n"
16839                 "             OpReturnValue %param\n"
16840                 "             OpFunctionEnd\n"
16841         );
16842
16843         const Math16ArgFragments        argFragment1    =
16844         {
16845                 "     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16846                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
16847                 "     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16848                 "",
16849                 "",
16850                 "",
16851         };
16852
16853         const Math16ArgFragments        argFragment2    =
16854         {
16855                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16856                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
16857                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
16858                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16859                 "",
16860                 "",
16861                 "",
16862         };
16863
16864         const Math16ArgFragments        argFragment3    =
16865         {
16866                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16867                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
16868                 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
16869                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
16870                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16871                 "",
16872                 "",
16873                 "",
16874         };
16875
16876         const Math16ArgFragments        argFragmentLdExp        =
16877         {
16878                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16879                 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
16880                 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
16881                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
16882                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16883
16884                 "",
16885
16886                 "",
16887
16888                 "",
16889         };
16890
16891         const Math16ArgFragments        argFragmentModfFrac     =
16892         {
16893                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16894                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
16895                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16896
16897                 "   %fp_tmp = OpTypePointer Function %${tr}\n",
16898
16899                 "",
16900
16901                 "      %tmp = OpVariable %fp_tmp Function\n",
16902         };
16903
16904         const Math16ArgFragments        argFragmentModfInt      =
16905         {
16906                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16907                 "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
16908                 "     %tmp0 = OpAccessChain %fp_tmp %tmp\n"
16909                 "  %val_dst = OpLoad %${tr} %tmp0\n"
16910                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16911
16912                 "   %fp_tmp = OpTypePointer Function %${tr}\n",
16913
16914                 "",
16915
16916                 "      %tmp = OpVariable %fp_tmp Function\n",
16917         };
16918
16919         const Math16ArgFragments        argFragmentModfStruct   =
16920         {
16921                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16922                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
16923                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
16924                 "             OpStore %tmp_ptr_s %val_tmp\n"
16925                 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
16926                 "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
16927                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16928
16929                 "  %fp_${tr} = OpTypePointer Function %${tr}\n"
16930                 "   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
16931                 "   %fp_tmp = OpTypePointer Function %st_tmp\n"
16932                 "   %c_frac = OpConstant %i32 0\n"
16933                 "    %c_int = OpConstant %i32 1\n",
16934
16935                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
16936                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
16937
16938                 "      %tmp = OpVariable %fp_tmp Function\n",
16939         };
16940
16941         const Math16ArgFragments        argFragmentFrexpStructS =
16942         {
16943                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16944                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
16945                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
16946                 "             OpStore %tmp_ptr_s %val_tmp\n"
16947                 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
16948                 "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
16949                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16950
16951                 "  %fp_${tr} = OpTypePointer Function %${tr}\n"
16952                 "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
16953                 "   %fp_tmp = OpTypePointer Function %st_tmp\n",
16954
16955                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
16956                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
16957
16958                 "      %tmp = OpVariable %fp_tmp Function\n",
16959         };
16960
16961         const Math16ArgFragments        argFragmentFrexpStructE =
16962         {
16963                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16964                 "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
16965                 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
16966                 "             OpStore %tmp_ptr_s %val_tmp\n"
16967                 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
16968                 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
16969                 "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
16970                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16971
16972                 "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
16973                 "   %fp_tmp = OpTypePointer Function %st_tmp\n",
16974
16975                 "OpMemberDecorate %st_tmp 0 Offset 0\n"
16976                 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
16977
16978                 "      %tmp = OpVariable %fp_tmp Function\n",
16979         };
16980
16981         const Math16ArgFragments        argFragmentFrexpS               =
16982         {
16983                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16984                 "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
16985                 "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
16986                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
16987
16988                 "",
16989
16990                 "",
16991
16992                 "      %tmp = OpVariable %fp_${dr}i32 Function\n",
16993         };
16994
16995         const Math16ArgFragments        argFragmentFrexpE               =
16996         {
16997                 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
16998                 "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
16999                 "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
17000                 "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
17001                 "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
17002                 "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
17003
17004                 "",
17005
17006                 "",
17007
17008                 "      %tmp = OpVariable %fp_${dr}i32 Function\n",
17009         };
17010
17011         string load_funcs[MATH16_TYPE_LAST];
17012         load_funcs[SCALAR] = loadScalarF16FromUint;
17013         load_funcs[VEC2]   = loadV2F16FromUint;
17014         load_funcs[VEC3]   = loadV3F16FromUints;
17015         load_funcs[VEC4]   = loadV4F16FromUints;
17016         load_funcs[MAT2X2] = loadM2x2F16FromUints;
17017         load_funcs[MAT2X3] = loadM2x3F16FromUints;
17018         load_funcs[MAT2X4] = loadM2x4F16FromUints;
17019         load_funcs[MAT3X2] = loadM3x2F16FromUints;
17020         load_funcs[MAT3X3] = loadM3x3F16FromUints;
17021         load_funcs[MAT3X4] = loadM3x4F16FromUints;
17022         load_funcs[MAT4X2] = loadM4x2F16FromUints;
17023         load_funcs[MAT4X3] = loadM4x3F16FromUints;
17024         load_funcs[MAT4X4] = loadM4x4F16FromUints;
17025
17026         string store_funcs[MATH16_TYPE_LAST];
17027         store_funcs[SCALAR] = storeScalarF16AsUint;
17028         store_funcs[VEC2]   = storeV2F16AsUint;
17029         store_funcs[VEC3]   = storeV3F16AsUints;
17030         store_funcs[VEC4]   = storeV4F16AsUints;
17031         store_funcs[MAT2X2] = storeM2x2F16AsUints;
17032         store_funcs[MAT2X3] = storeM2x3F16AsUints;
17033         store_funcs[MAT2X4] = storeM2x4F16AsUints;
17034         store_funcs[MAT3X2] = storeM3x2F16AsUints;
17035         store_funcs[MAT3X3] = storeM3x3F16AsUints;
17036         store_funcs[MAT3X4] = storeM3x4F16AsUints;
17037         store_funcs[MAT4X2] = storeM4x2F16AsUints;
17038         store_funcs[MAT4X3] = storeM4x3F16AsUints;
17039         store_funcs[MAT4X4] = storeM4x4F16AsUints;
17040
17041         const Math16TestType&           testType                                = testTypes[testTypeIdx];
17042         const string                            funcNameString                  = string(testFunc.funcName) + string(testFunc.funcSuffix);
17043         const string                            testName                                = de::toLower(funcNameString);
17044         const Math16ArgFragments*       argFragments                    = DE_NULL;
17045         const size_t                            typeStructStride                = testType.typeStructStride;
17046         const bool                                      extInst                                 = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
17047         const size_t                            numFloatsPerArg0Type    = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
17048         const size_t                            iterations                              = numDataPoints / numFloatsPerArg0Type;
17049         const size_t                            numFloatsPerResultType  = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
17050         const vector<deFloat16>         float16DummyOutput              (iterations * numFloatsPerResultType, 0);
17051         VulkanFeatures                          features;
17052         SpecResource                            specResource;
17053         map<string, string>                     specs;
17054         map<string, string>                     fragments;
17055         vector<string>                          extensions;
17056         string                                          funcCall;
17057         string                                          funcVariables;
17058         string                                          variables;
17059         string                                          declarations;
17060         string                                          decorations;
17061         string                                          functions;
17062
17063         switch (testFunc.funcArgsCount)
17064         {
17065                 case 1:
17066                 {
17067                         argFragments = &argFragment1;
17068
17069                         if (funcNameString == "ModfFrac")               argFragments = &argFragmentModfFrac;
17070                         if (funcNameString == "ModfInt")                argFragments = &argFragmentModfInt;
17071                         if (funcNameString == "ModfStructFrac") argFragments = &argFragmentModfStruct;
17072                         if (funcNameString == "ModfStructInt")  argFragments = &argFragmentModfStruct;
17073                         if (funcNameString == "FrexpS")                 argFragments = &argFragmentFrexpS;
17074                         if (funcNameString == "FrexpE")                 argFragments = &argFragmentFrexpE;
17075                         if (funcNameString == "FrexpStructS")   argFragments = &argFragmentFrexpStructS;
17076                         if (funcNameString == "FrexpStructE")   argFragments = &argFragmentFrexpStructE;
17077
17078                         break;
17079                 }
17080                 case 2:
17081                 {
17082                         argFragments = &argFragment2;
17083
17084                         if (funcNameString == "Ldexp")                  argFragments = &argFragmentLdExp;
17085
17086                         break;
17087                 }
17088                 case 3:
17089                 {
17090                         argFragments = &argFragment3;
17091
17092                         break;
17093                 }
17094                 default:
17095                 {
17096                         TCU_THROW(InternalError, "Invalid number of arguments");
17097                 }
17098         }
17099
17100         functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
17101         if (testFunc.funcArgsCount == 1)
17102         {
17103                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
17104                 variables +=
17105                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
17106                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
17107
17108                 decorations +=
17109                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
17110                         "OpDecorate %ssbo_src0 Binding 0\n"
17111                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
17112                         "OpDecorate %ssbo_dst Binding 1\n";
17113         }
17114         else if (testFunc.funcArgsCount == 2)
17115         {
17116                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
17117                 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
17118                 variables +=
17119                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
17120                         " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
17121                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
17122
17123                 decorations +=
17124                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
17125                         "OpDecorate %ssbo_src0 Binding 0\n"
17126                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
17127                         "OpDecorate %ssbo_src1 Binding 1\n"
17128                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
17129                         "OpDecorate %ssbo_dst Binding 2\n";
17130         }
17131         else if (testFunc.funcArgsCount == 3)
17132         {
17133                 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
17134                 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
17135                 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
17136                 variables +=
17137                         " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
17138                         " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
17139                         " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
17140                         "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
17141
17142                 decorations +=
17143                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
17144                         "OpDecorate %ssbo_src0 Binding 0\n"
17145                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
17146                         "OpDecorate %ssbo_src1 Binding 1\n"
17147                         "OpDecorate %ssbo_src2 DescriptorSet 0\n"
17148                         "OpDecorate %ssbo_src2 Binding 2\n"
17149                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
17150                         "OpDecorate %ssbo_dst Binding 3\n";
17151         }
17152         else
17153         {
17154                 TCU_THROW(InternalError, "Invalid number of function arguments");
17155         }
17156
17157         variables       += argFragments->variables;
17158         decorations     += argFragments->decorations;
17159
17160         specs["dr"]                                     = testTypes[testFunc.typeResult].typePrefix;
17161         specs["d0"]                                     = testTypes[testFunc.typeArg0].typePrefix;
17162         specs["d1"]                                     = testTypes[testFunc.typeArg1].typePrefix;
17163         specs["d2"]                                     = testTypes[testFunc.typeArg2].typePrefix;
17164         specs["tr"]                                     = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
17165         specs["t0"]                                     = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
17166         specs["t1"]                                     = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
17167         specs["t2"]                                     = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
17168         specs["store_tr"]                       = string(testTypes[testFunc.typeResult].storage_type);
17169         specs["store_t0"]                       = string(testTypes[testFunc.typeArg0].storage_type);
17170         specs["store_t1"]                       = string(testTypes[testFunc.typeArg1].storage_type);
17171         specs["store_t2"]                       = string(testTypes[testFunc.typeArg2].storage_type);
17172         specs["struct_stride"]          = de::toString(typeStructStride);
17173         specs["op"]                                     = extInst ? "OpExtInst" : testFunc.funcName;
17174         specs["ext_inst"]                       = extInst ? string("%ext_import ") + testFunc.funcName : "";
17175         specs["struct_member"]          = de::toLower(testFunc.funcSuffix);
17176
17177         variables                                       = StringTemplate(variables).specialize(specs);
17178         decorations                                     = StringTemplate(decorations).specialize(specs);
17179         funcVariables                           = StringTemplate(argFragments->funcVariables).specialize(specs);
17180         funcCall                                        = StringTemplate(argFragments->bodies).specialize(specs);
17181
17182         specs["num_data_points"]        = de::toString(iterations);
17183         specs["arg_vars"]                       = variables;
17184         specs["arg_decorations"]        = decorations;
17185         specs["arg_infunc_vars"]        = funcVariables;
17186         specs["arg_func_call"]          = funcCall;
17187
17188         fragments["extension"]          = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
17189         fragments["capability"]         = "OpCapability Matrix\nOpCapability Float16\n";
17190         fragments["decoration"]         = decoration.specialize(specs);
17191         fragments["pre_main"]           = preMain.specialize(specs) + functions;
17192         fragments["testfun"]            = testFun.specialize(specs);
17193
17194         for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
17195         {
17196                 const size_t                    numFloatsPerItem        = (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
17197                                                                                                         : (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
17198                                                                                                         : (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
17199                                                                                                         : -1;
17200                 const vector<deFloat16> inputData                       = testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
17201
17202                 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
17203         }
17204
17205         specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
17206         specResource.verifyIO = testFunc.verifyFunc;
17207
17208         extensions.push_back("VK_KHR_shader_float16_int8");
17209
17210         features.extFloat16Int8         = EXTFLOAT16INT8FEATURES_FLOAT16;
17211
17212         finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
17213 }
17214
17215 template<size_t C, class SpecResource>
17216 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
17217 {
17218         DE_STATIC_ASSERT(C >= 1 && C <= 4);
17219
17220         const std::string                               testGroupName   (string("arithmetic_") + de::toString(C));
17221         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
17222         const Math16TestFunc                    testFuncs[]             =
17223         {
17224                 {       "OpFNegate",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16OpFNegate>                                       },
17225                 {       "Round",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Round>                                           },
17226                 {       "RoundEven",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16RoundEven>                                       },
17227                 {       "Trunc",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Trunc>                                           },
17228                 {       "FAbs",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FAbs>                                            },
17229                 {       "FSign",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FSign>                                           },
17230                 {       "Floor",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Floor>                                           },
17231                 {       "Ceil",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Ceil>                                            },
17232                 {       "Fract",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Fract>                                           },
17233                 {       "Radians",                              "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Radians>                                         },
17234                 {       "Degrees",                              "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Degrees>                                         },
17235                 {       "Sin",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sin>                                                     },
17236                 {       "Cos",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cos>                                                     },
17237                 {       "Tan",                                  "",                     1,      C,              C,              0,              0, &getInputDataPI,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tan>                                                     },
17238                 {       "Asin",                                 "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asin>                                            },
17239                 {       "Acos",                                 "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acos>                                            },
17240                 {       "Atan",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atan>                                            },
17241                 {       "Sinh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sinh>                                            },
17242                 {       "Cosh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cosh>                                            },
17243                 {       "Tanh",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tanh>                                            },
17244                 {       "Asinh",                                "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asinh>                                           },
17245                 {       "Acosh",                                "",                     1,      C,              C,              0,              0, &getInputDataAC,     compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acosh>                                           },
17246                 {       "Atanh",                                "",                     1,      C,              C,              0,              0, &getInputDataA,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atanh>                                           },
17247                 {       "Exp",                                  "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp>                                                     },
17248                 {       "Log",                                  "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log>                                                     },
17249                 {       "Exp2",                                 "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp2>                                            },
17250                 {       "Log2",                                 "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log2>                                            },
17251                 {       "Sqrt",                                 "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sqrt>                                            },
17252                 {       "InverseSqrt",                  "",                     1,      C,              C,              0,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16InverseSqrt>                                     },
17253                 {       "Modf",                                 "Frac",         1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>                                        },
17254                 {       "Modf",                                 "Int",          1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>                                         },
17255                 {       "ModfStruct",                   "Frac",         1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>                                        },
17256                 {       "ModfStruct",                   "Int",          1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>                                         },
17257                 {       "Frexp",                                "S",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>                                          },
17258                 {       "Frexp",                                "E",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>                                          },
17259                 {       "FrexpStruct",                  "S",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>                                          },
17260                 {       "FrexpStruct",                  "E",            1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>                                          },
17261                 {       "OpFAdd",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFAdd>                                          },
17262                 {       "OpFSub",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFSub>                                          },
17263                 {       "OpFMul",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFMul>                                          },
17264                 {       "OpFDiv",                               "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFDiv>                                          },
17265                 {       "Atan2",                                "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Atan2>                                           },
17266                 {       "Pow",                                  "",                     2,      C,              C,              C,              0, &getInputDataP,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Pow>                                                     },
17267                 {       "FMin",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMin>                                            },
17268                 {       "FMax",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMax>                                            },
17269                 {       "Step",                                 "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Step>                                            },
17270                 {       "Ldexp",                                "",                     2,      C,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Ldexp>                                           },
17271                 {       "FClamp",                               "",                     3,      C,              C,              C,              C, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FClamp>                                          },
17272                 {       "FMix",                                 "",                     3,      C,              C,              C,              C, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FMix>                                            },
17273                 {       "SmoothStep",                   "",                     3,      C,              C,              C,              C, &getInputDataSS,     compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16SmoothStep>                                      },
17274                 {       "Fma",                                  "",                     3,      C,              C,              C,              C, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16Fma>                                                     },
17275                 {       "Length",                               "",                     1,      1,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  1,  C,  0,  0, fp16Length>                                          },
17276                 {       "Distance",                             "",                     2,      1,              C,              C,              0, &getInputData,       compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Distance>                                        },
17277                 {       "Cross",                                "",                     2,      C,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Cross>                                           },
17278                 {       "Normalize",                    "",                     1,      C,              C,              0,              0, &getInputData,       compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Normalize>                                       },
17279                 {       "FaceForward",                  "",                     3,      C,              C,              C,              C, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FaceForward>                                     },
17280                 {       "Reflect",                              "",                     2,      C,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Reflect>                                         },
17281                 {       "Refract",                              "",                     3,      C,              C,              C,              1, &getInputDataN,      compareFP16ArithmeticFunc<  C,  C,  C,  1, fp16Refract>                                         },
17282                 {       "OpDot",                                "",                     2,      1,              C,              C,              0, &getInputDataD,      compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Dot>                                                     },
17283                 {       "OpVectorTimesScalar",  "",                     2,      C,              C,              1,              0, &getInputDataV,      compareFP16ArithmeticFunc<  C,  C,  1,  0, fp16VectorTimesScalar>                       },
17284         };
17285
17286         for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
17287         {
17288                 const Math16TestFunc&   testFunc                = testFuncs[testFuncIdx];
17289                 const string                    funcNameString  = testFunc.funcName;
17290
17291                 if ((C != 3) && funcNameString == "Cross")
17292                         continue;
17293
17294                 if ((C < 2) && funcNameString == "OpDot")
17295                         continue;
17296
17297                 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
17298                         continue;
17299
17300                 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
17301         }
17302
17303         return testGroup.release();
17304 }
17305
17306 template<class SpecResource>
17307 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
17308 {
17309         const std::string                               testGroupName   ("arithmetic");
17310         de::MovePtr<tcu::TestCaseGroup> testGroup               (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
17311         const Math16TestFunc                    testFuncs[]             =
17312         {
17313                 {       "OpTranspose",                  "2x2",          1,      MAT2X2, MAT2X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Transpose<2,2> >                         },
17314                 {       "OpTranspose",                  "3x2",          1,      MAT2X3, MAT3X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<3,2> >                         },
17315                 {       "OpTranspose",                  "4x2",          1,      MAT2X4, MAT4X2, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<4,2> >                         },
17316                 {       "OpTranspose",                  "2x3",          1,      MAT3X2, MAT2X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,3> >                         },
17317                 {       "OpTranspose",                  "3x3",          1,      MAT3X3, MAT3X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,3> >                         },
17318                 {       "OpTranspose",                  "4x3",          1,      MAT3X4, MAT4X3, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,3> >                         },
17319                 {       "OpTranspose",                  "2x4",          1,      MAT4X2, MAT2X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,4> >                         },
17320                 {       "OpTranspose",                  "3x4",          1,      MAT4X3, MAT3X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,4> >                         },
17321                 {       "OpTranspose",                  "4x4",          1,      MAT4X4, MAT4X4, 0,              0, &getInputDataM,      compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,4> >                         },
17322                 {       "OpMatrixTimesScalar",  "2x2",          2,      MAT2X2, MAT2X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4,  1,  0, fp16MatrixTimesScalar<2,2> >         },
17323                 {       "OpMatrixTimesScalar",  "2x3",          2,      MAT2X3, MAT2X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,3> >         },
17324                 {       "OpMatrixTimesScalar",  "2x4",          2,      MAT2X4, MAT2X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,4> >         },
17325                 {       "OpMatrixTimesScalar",  "3x2",          2,      MAT3X2, MAT3X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<3,2> >         },
17326                 {       "OpMatrixTimesScalar",  "3x3",          2,      MAT3X3, MAT3X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,3> >         },
17327                 {       "OpMatrixTimesScalar",  "3x4",          2,      MAT3X4, MAT3X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,4> >         },
17328                 {       "OpMatrixTimesScalar",  "4x2",          2,      MAT4X2, MAT4X2, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<4,2> >         },
17329                 {       "OpMatrixTimesScalar",  "4x3",          2,      MAT4X3, MAT4X3, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,3> >         },
17330                 {       "OpMatrixTimesScalar",  "4x4",          2,      MAT4X4, MAT4X4, 1,              0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,4> >         },
17331                 {       "OpVectorTimesMatrix",  "2x2",          2,      VEC2,   VEC2,   MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  2,  4,  0, fp16VectorTimesMatrix<2,2> >         },
17332                 {       "OpVectorTimesMatrix",  "2x3",          2,      VEC2,   VEC3,   MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  3,  8,  0, fp16VectorTimesMatrix<2,3> >         },
17333                 {       "OpVectorTimesMatrix",  "2x4",          2,      VEC2,   VEC4,   MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  4,  8,  0, fp16VectorTimesMatrix<2,4> >         },
17334                 {       "OpVectorTimesMatrix",  "3x2",          2,      VEC3,   VEC2,   MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  2,  8,  0, fp16VectorTimesMatrix<3,2> >         },
17335                 {       "OpVectorTimesMatrix",  "3x3",          2,      VEC3,   VEC3,   MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  3, 16,  0, fp16VectorTimesMatrix<3,3> >         },
17336                 {       "OpVectorTimesMatrix",  "3x4",          2,      VEC3,   VEC4,   MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  4, 16,  0, fp16VectorTimesMatrix<3,4> >         },
17337                 {       "OpVectorTimesMatrix",  "4x2",          2,      VEC4,   VEC2,   MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  2,  8,  0, fp16VectorTimesMatrix<4,2> >         },
17338                 {       "OpVectorTimesMatrix",  "4x3",          2,      VEC4,   VEC3,   MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  3, 16,  0, fp16VectorTimesMatrix<4,3> >         },
17339                 {       "OpVectorTimesMatrix",  "4x4",          2,      VEC4,   VEC4,   MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4, 16,  0, fp16VectorTimesMatrix<4,4> >         },
17340                 {       "OpMatrixTimesVector",  "2x2",          2,      VEC2,   MAT2X2, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  4,  2,  0, fp16MatrixTimesVector<2,2> >         },
17341                 {       "OpMatrixTimesVector",  "2x3",          2,      VEC3,   MAT2X3, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3,  8,  2,  0, fp16MatrixTimesVector<2,3> >         },
17342                 {       "OpMatrixTimesVector",  "2x4",          2,      VEC4,   MAT2X4, VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  2,  0, fp16MatrixTimesVector<2,4> >         },
17343                 {       "OpMatrixTimesVector",  "3x2",          2,      VEC2,   MAT3X2, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  8,  3,  0, fp16MatrixTimesVector<3,2> >         },
17344                 {       "OpMatrixTimesVector",  "3x3",          2,      VEC3,   MAT3X3, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3, 16,  3,  0, fp16MatrixTimesVector<3,3> >         },
17345                 {       "OpMatrixTimesVector",  "3x4",          2,      VEC4,   MAT3X4, VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4, 16,  3,  0, fp16MatrixTimesVector<3,4> >         },
17346                 {       "OpMatrixTimesVector",  "4x2",          2,      VEC2,   MAT4X2, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  2,  8,  4,  0, fp16MatrixTimesVector<4,2> >         },
17347                 {       "OpMatrixTimesVector",  "4x3",          2,      VEC3,   MAT4X3, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  3, 16,  4,  0, fp16MatrixTimesVector<4,3> >         },
17348                 {       "OpMatrixTimesVector",  "4x4",          2,      VEC4,   MAT4X4, VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4, 16,  4,  0, fp16MatrixTimesVector<4,4> >         },
17349                 {       "OpMatrixTimesMatrix",  "2x2_2x2",      2,      MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  4,  4,  0, fp16MatrixTimesMatrix<2,2,2,2> >     },
17350                 {       "OpMatrixTimesMatrix",  "2x2_3x2",      2,      MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,3,2> >     },
17351                 {       "OpMatrixTimesMatrix",  "2x2_4x2",      2,      MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,4,2> >     },
17352                 {       "OpMatrixTimesMatrix",  "2x3_2x2",      2,      MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,3,2,2> >     },
17353                 {       "OpMatrixTimesMatrix",  "2x3_3x2",      2,      MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,3,2> >     },
17354                 {       "OpMatrixTimesMatrix",  "2x3_4x2",      2,      MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,4,2> >     },
17355                 {       "OpMatrixTimesMatrix",  "2x4_2x2",      2,      MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,4,2,2> >     },
17356                 {       "OpMatrixTimesMatrix",  "2x4_3x2",      2,      MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,3,2> >     },
17357                 {       "OpMatrixTimesMatrix",  "2x4_4x2",      2,      MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,4,2> >     },
17358                 {       "OpMatrixTimesMatrix",  "3x2_2x3",      2,      MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<3,2,2,3> >     },
17359                 {       "OpMatrixTimesMatrix",  "3x2_3x3",      2,      MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,3,3> >     },
17360                 {       "OpMatrixTimesMatrix",  "3x2_4x3",      2,      MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,4,3> >     },
17361                 {       "OpMatrixTimesMatrix",  "3x3_2x3",      2,      MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,3,2,3> >     },
17362                 {       "OpMatrixTimesMatrix",  "3x3_3x3",      2,      MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,3,3> >     },
17363                 {       "OpMatrixTimesMatrix",  "3x3_4x3",      2,      MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,4,3> >     },
17364                 {       "OpMatrixTimesMatrix",  "3x4_2x3",      2,      MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,4,2,3> >     },
17365                 {       "OpMatrixTimesMatrix",  "3x4_3x3",      2,      MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,3,3> >     },
17366                 {       "OpMatrixTimesMatrix",  "3x4_4x3",      2,      MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,4,3> >     },
17367                 {       "OpMatrixTimesMatrix",  "4x2_2x4",      2,      MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<4,2,2,4> >     },
17368                 {       "OpMatrixTimesMatrix",  "4x2_3x4",      2,      MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,3,4> >     },
17369                 {       "OpMatrixTimesMatrix",  "4x2_4x4",      2,      MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,4,4> >     },
17370                 {       "OpMatrixTimesMatrix",  "4x3_2x4",      2,      MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,3,2,4> >     },
17371                 {       "OpMatrixTimesMatrix",  "4x3_3x4",      2,      MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,3,4> >     },
17372                 {       "OpMatrixTimesMatrix",  "4x3_4x4",      2,      MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,4,4> >     },
17373                 {       "OpMatrixTimesMatrix",  "4x4_2x4",      2,      MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD,      compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,4,2,4> >     },
17374                 {       "OpMatrixTimesMatrix",  "4x4_3x4",      2,      MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,3,4> >     },
17375                 {       "OpMatrixTimesMatrix",  "4x4_4x4",      2,      MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD,      compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,4,4> >     },
17376                 {       "OpOuterProduct",               "2x2",          2,      MAT2X2, VEC2,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  4,  2,  2,  0, fp16OuterProduct<2,2> >                      },
17377                 {       "OpOuterProduct",               "2x3",          2,      MAT2X3, VEC3,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  3,  2,  0, fp16OuterProduct<2,3> >                      },
17378                 {       "OpOuterProduct",               "2x4",          2,      MAT2X4, VEC4,   VEC2,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  4,  2,  0, fp16OuterProduct<2,4> >                      },
17379                 {       "OpOuterProduct",               "3x2",          2,      MAT3X2, VEC2,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  2,  3,  0, fp16OuterProduct<3,2> >                      },
17380                 {       "OpOuterProduct",               "3x3",          2,      MAT3X3, VEC3,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  3,  3,  0, fp16OuterProduct<3,3> >                      },
17381                 {       "OpOuterProduct",               "3x4",          2,      MAT3X4, VEC4,   VEC3,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  4,  3,  0, fp16OuterProduct<3,4> >                      },
17382                 {       "OpOuterProduct",               "4x2",          2,      MAT4X2, VEC2,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc<  8,  2,  4,  0, fp16OuterProduct<4,2> >                      },
17383                 {       "OpOuterProduct",               "4x3",          2,      MAT4X3, VEC3,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  3,  4,  0, fp16OuterProduct<4,3> >                      },
17384                 {       "OpOuterProduct",               "4x4",          2,      MAT4X4, VEC4,   VEC4,   0, &getInputDataD,      compareFP16ArithmeticFunc< 16,  4,  4,  0, fp16OuterProduct<4,4> >                      },
17385                 {       "Determinant",                  "2x2",          1,      SCALAR, MAT2X2, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1,  4,  0,  0, fp16Determinant<2> >                         },
17386                 {       "Determinant",                  "3x3",          1,      SCALAR, MAT3X3, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<3> >                         },
17387                 {       "Determinant",                  "4x4",          1,      SCALAR, MAT4X4, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<4> >                         },
17388                 {       "MatrixInverse",                "2x2",          1,      MAT2X2, MAT2X2, NONE,   0, &getInputDataC,      compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Inverse<2> >                                     },
17389         };
17390
17391         for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
17392         {
17393                 const Math16TestFunc&   testFunc        = testFuncs[testFuncIdx];
17394
17395                 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
17396         }
17397
17398         return testGroup.release();
17399 }
17400
17401 const string getNumberTypeName (const NumberType type)
17402 {
17403         if (type == NUMBERTYPE_INT32)
17404         {
17405                 return "int";
17406         }
17407         else if (type == NUMBERTYPE_UINT32)
17408         {
17409                 return "uint";
17410         }
17411         else if (type == NUMBERTYPE_FLOAT32)
17412         {
17413                 return "float";
17414         }
17415         else
17416         {
17417                 DE_ASSERT(false);
17418                 return "";
17419         }
17420 }
17421
17422 deInt32 getInt(de::Random& rnd)
17423 {
17424         return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
17425 }
17426
17427 const string repeatString (const string& str, int times)
17428 {
17429         string filler;
17430         for (int i = 0; i < times; ++i)
17431         {
17432                 filler += str;
17433         }
17434         return filler;
17435 }
17436
17437 const string getRandomConstantString (const NumberType type, de::Random& rnd)
17438 {
17439         if (type == NUMBERTYPE_INT32)
17440         {
17441                 return numberToString<deInt32>(getInt(rnd));
17442         }
17443         else if (type == NUMBERTYPE_UINT32)
17444         {
17445                 return numberToString<deUint32>(rnd.getUint32());
17446         }
17447         else if (type == NUMBERTYPE_FLOAT32)
17448         {
17449                 return numberToString<float>(rnd.getFloat());
17450         }
17451         else
17452         {
17453                 DE_ASSERT(false);
17454                 return "";
17455         }
17456 }
17457
17458 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
17459 {
17460         map<string, string> params;
17461
17462         // Vec2 to Vec4
17463         for (int width = 2; width <= 4; ++width)
17464         {
17465                 const string randomConst = numberToString(getInt(rnd));
17466                 const string widthStr = numberToString(width);
17467                 const string composite_type = "${customType}vec" + widthStr;
17468                 const int index = rnd.getInt(0, width-1);
17469
17470                 params["type"]                  = "vec";
17471                 params["name"]                  = params["type"] + "_" + widthStr;
17472                 params["compositeDecl"]         = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
17473                 params["compositeType"]         = composite_type;
17474                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
17475                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
17476                 params["indexes"]               = numberToString(index);
17477                 testCases.push_back(params);
17478         }
17479 }
17480
17481 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
17482 {
17483         const int limit = 10;
17484         map<string, string> params;
17485
17486         for (int width = 2; width <= limit; ++width)
17487         {
17488                 string randomConst = numberToString(getInt(rnd));
17489                 string widthStr = numberToString(width);
17490                 int index = rnd.getInt(0, width-1);
17491
17492                 params["type"]                  = "array";
17493                 params["name"]                  = params["type"] + "_" + widthStr;
17494                 params["compositeDecl"]         = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
17495                                                                                         +        "%composite = OpTypeArray ${customType} %arraywidth\n";
17496                 params["compositeType"]         = "%composite";
17497                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
17498                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
17499                 params["indexes"]               = numberToString(index);
17500                 testCases.push_back(params);
17501         }
17502 }
17503
17504 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
17505 {
17506         const int limit = 10;
17507         map<string, string> params;
17508
17509         for (int width = 2; width <= limit; ++width)
17510         {
17511                 string randomConst = numberToString(getInt(rnd));
17512                 int index = rnd.getInt(0, width-1);
17513
17514                 params["type"]                  = "struct";
17515                 params["name"]                  = params["type"] + "_" + numberToString(width);
17516                 params["compositeDecl"]         = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
17517                 params["compositeType"]         = "%composite";
17518                 params["filler"]                = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
17519                 params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
17520                 params["indexes"]               = numberToString(index);
17521                 testCases.push_back(params);
17522         }
17523 }
17524
17525 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
17526 {
17527         map<string, string> params;
17528
17529         // Vec2 to Vec4
17530         for (int width = 2; width <= 4; ++width)
17531         {
17532                 string widthStr = numberToString(width);
17533
17534                 for (int column = 2 ; column <= 4; ++column)
17535                 {
17536                         int index_0 = rnd.getInt(0, column-1);
17537                         int index_1 = rnd.getInt(0, width-1);
17538                         string columnStr = numberToString(column);
17539
17540                         params["type"]          = "matrix";
17541                         params["name"]          = params["type"] + "_" + widthStr + "x" + columnStr;
17542                         params["compositeDecl"] = string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
17543                                                                                                 +        "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
17544                         params["compositeType"] = "%composite";
17545
17546                         params["filler"]        = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
17547                                                                                                 +        "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
17548
17549                         params["compositeConstruct"]    = "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
17550                         params["indexes"]       = numberToString(index_0) + " " + numberToString(index_1);
17551                         testCases.push_back(params);
17552                 }
17553         }
17554 }
17555
17556 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
17557 {
17558         createVectorCompositeCases(testCases, rnd, type);
17559         createArrayCompositeCases(testCases, rnd, type);
17560         createStructCompositeCases(testCases, rnd, type);
17561         // Matrix only supports float types
17562         if (type == NUMBERTYPE_FLOAT32)
17563         {
17564                 createMatrixCompositeCases(testCases, rnd, type);
17565         }
17566 }
17567
17568 const string getAssemblyTypeDeclaration (const NumberType type)
17569 {
17570         switch (type)
17571         {
17572                 case NUMBERTYPE_INT32:          return "OpTypeInt 32 1";
17573                 case NUMBERTYPE_UINT32:         return "OpTypeInt 32 0";
17574                 case NUMBERTYPE_FLOAT32:        return "OpTypeFloat 32";
17575                 default:                        DE_ASSERT(false); return "";
17576         }
17577 }
17578
17579 const string getAssemblyTypeName (const NumberType type)
17580 {
17581         switch (type)
17582         {
17583                 case NUMBERTYPE_INT32:          return "%i32";
17584                 case NUMBERTYPE_UINT32:         return "%u32";
17585                 case NUMBERTYPE_FLOAT32:        return "%f32";
17586                 default:                        DE_ASSERT(false); return "";
17587         }
17588 }
17589
17590 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
17591 {
17592         map<string, string>     parameters(params);
17593
17594         const string customType = getAssemblyTypeName(type);
17595         map<string, string> substCustomType;
17596         substCustomType["customType"] = customType;
17597         parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
17598         parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
17599         parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
17600         parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
17601         parameters["customType"] = customType;
17602         parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
17603
17604         if (parameters.at("compositeType") != "%u32vec3")
17605         {
17606                 parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
17607         }
17608
17609         return StringTemplate(
17610                 "OpCapability Shader\n"
17611                 "OpCapability Matrix\n"
17612                 "OpMemoryModel Logical GLSL450\n"
17613                 "OpEntryPoint GLCompute %main \"main\" %id\n"
17614                 "OpExecutionMode %main LocalSize 1 1 1\n"
17615
17616                 "OpSource GLSL 430\n"
17617                 "OpName %main           \"main\"\n"
17618                 "OpName %id             \"gl_GlobalInvocationID\"\n"
17619
17620                 // Decorators
17621                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
17622                 "OpDecorate %buf BufferBlock\n"
17623                 "OpDecorate %indata DescriptorSet 0\n"
17624                 "OpDecorate %indata Binding 0\n"
17625                 "OpDecorate %outdata DescriptorSet 0\n"
17626                 "OpDecorate %outdata Binding 1\n"
17627                 "OpDecorate %customarr ArrayStride 4\n"
17628                 "${compositeDecorator}"
17629                 "OpMemberDecorate %buf 0 Offset 0\n"
17630
17631                 // General types
17632                 "%void      = OpTypeVoid\n"
17633                 "%voidf     = OpTypeFunction %void\n"
17634                 "%u32       = OpTypeInt 32 0\n"
17635                 "%i32       = OpTypeInt 32 1\n"
17636                 "%f32       = OpTypeFloat 32\n"
17637
17638                 // Composite declaration
17639                 "${compositeDecl}"
17640
17641                 // Constants
17642                 "${filler}"
17643
17644                 "${u32vec3Decl:opt}"
17645                 "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
17646
17647                 // Inherited from custom
17648                 "%customptr = OpTypePointer Uniform ${customType}\n"
17649                 "%customarr = OpTypeRuntimeArray ${customType}\n"
17650                 "%buf       = OpTypeStruct %customarr\n"
17651                 "%bufptr    = OpTypePointer Uniform %buf\n"
17652
17653                 "%indata    = OpVariable %bufptr Uniform\n"
17654                 "%outdata   = OpVariable %bufptr Uniform\n"
17655
17656                 "%id        = OpVariable %uvec3ptr Input\n"
17657                 "%zero      = OpConstant %i32 0\n"
17658
17659                 "%main      = OpFunction %void None %voidf\n"
17660                 "%label     = OpLabel\n"
17661                 "%idval     = OpLoad %u32vec3 %id\n"
17662                 "%x         = OpCompositeExtract %u32 %idval 0\n"
17663
17664                 "%inloc     = OpAccessChain %customptr %indata %zero %x\n"
17665                 "%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
17666                 // Read the input value
17667                 "%inval     = OpLoad ${customType} %inloc\n"
17668                 // Create the composite and fill it
17669                 "${compositeConstruct}"
17670                 // Insert the input value to a place
17671                 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
17672                 // Read back the value from the position
17673                 "%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
17674                 // Store it in the output position
17675                 "             OpStore %outloc %out_val\n"
17676                 "             OpReturn\n"
17677                 "             OpFunctionEnd\n"
17678         ).specialize(parameters);
17679 }
17680
17681 template<typename T>
17682 BufferSp createCompositeBuffer(T number)
17683 {
17684         return BufferSp(new Buffer<T>(vector<T>(1, number)));
17685 }
17686
17687 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
17688 {
17689         de::MovePtr<tcu::TestCaseGroup> group   (new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
17690         de::Random                                              rnd             (deStringHash(group->getName()));
17691
17692         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
17693         {
17694                 NumberType                                              numberType              = NumberType(type);
17695                 const string                                    typeName                = getNumberTypeName(numberType);
17696                 const string                                    description             = "Test the OpCompositeInsert instruction with " + typeName + "s";
17697                 de::MovePtr<tcu::TestCaseGroup> subGroup                (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
17698                 vector<map<string, string> >    testCases;
17699
17700                 createCompositeCases(testCases, rnd, numberType);
17701
17702                 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
17703                 {
17704                         ComputeShaderSpec       spec;
17705
17706                         spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
17707
17708                         switch (numberType)
17709                         {
17710                                 case NUMBERTYPE_INT32:
17711                                 {
17712                                         deInt32 number = getInt(rnd);
17713                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
17714                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
17715                                         break;
17716                                 }
17717                                 case NUMBERTYPE_UINT32:
17718                                 {
17719                                         deUint32 number = rnd.getUint32();
17720                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
17721                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
17722                                         break;
17723                                 }
17724                                 case NUMBERTYPE_FLOAT32:
17725                                 {
17726                                         float number = rnd.getFloat();
17727                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
17728                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
17729                                         break;
17730                                 }
17731                                 default:
17732                                         DE_ASSERT(false);
17733                         }
17734
17735                         spec.numWorkGroups = IVec3(1, 1, 1);
17736                         subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
17737                 }
17738                 group->addChild(subGroup.release());
17739         }
17740         return group.release();
17741 }
17742
17743 struct AssemblyStructInfo
17744 {
17745         AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
17746         : components    (comp)
17747         , index                 (idx)
17748         {}
17749
17750         deUint32 components;
17751         deUint32 index;
17752 };
17753
17754 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
17755 {
17756         // Create the full index string
17757         string                          fullIndex       = numberToString(structInfo.index) + " " + params.at("indexes");
17758         // Convert it to list of indexes
17759         vector<string>          indexes         = de::splitString(fullIndex, ' ');
17760
17761         map<string, string>     parameters      (params);
17762         parameters["structType"]        = repeatString(" ${compositeType}", structInfo.components);
17763         parameters["structConstruct"]   = repeatString(" %instance", structInfo.components);
17764         parameters["insertIndexes"]     = fullIndex;
17765
17766         // In matrix cases the last two index is the CompositeExtract indexes
17767         const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
17768
17769         // Construct the extractIndex
17770         for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
17771         {
17772                 parameters["extractIndexes"] += " " + *index;
17773         }
17774
17775         // Remove the last 1 or 2 element depends on matrix case or not
17776         indexes.erase(indexes.end() - extractIndexes, indexes.end());
17777
17778         deUint32 id = 0;
17779         // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
17780         for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
17781         {
17782                 string indexId = "%index_" + numberToString(id++);
17783                 parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
17784                 parameters["accessChainIndexes"] += " " + indexId;
17785         }
17786
17787         parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
17788
17789         const string customType = getAssemblyTypeName(type);
17790         map<string, string> substCustomType;
17791         substCustomType["customType"] = customType;
17792         parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
17793         parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
17794         parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
17795         parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
17796         parameters["customType"] = customType;
17797
17798         const string compositeType = parameters.at("compositeType");
17799         map<string, string> substCompositeType;
17800         substCompositeType["compositeType"] = compositeType;
17801         parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
17802         if (compositeType != "%u32vec3")
17803         {
17804                 parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
17805         }
17806
17807         return StringTemplate(
17808                 "OpCapability Shader\n"
17809                 "OpCapability Matrix\n"
17810                 "OpMemoryModel Logical GLSL450\n"
17811                 "OpEntryPoint GLCompute %main \"main\" %id\n"
17812                 "OpExecutionMode %main LocalSize 1 1 1\n"
17813
17814                 "OpSource GLSL 430\n"
17815                 "OpName %main           \"main\"\n"
17816                 "OpName %id             \"gl_GlobalInvocationID\"\n"
17817                 // Decorators
17818                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
17819                 "OpDecorate %buf BufferBlock\n"
17820                 "OpDecorate %indata DescriptorSet 0\n"
17821                 "OpDecorate %indata Binding 0\n"
17822                 "OpDecorate %outdata DescriptorSet 0\n"
17823                 "OpDecorate %outdata Binding 1\n"
17824                 "OpDecorate %customarr ArrayStride 4\n"
17825                 "${compositeDecorator}"
17826                 "OpMemberDecorate %buf 0 Offset 0\n"
17827                 // General types
17828                 "%void      = OpTypeVoid\n"
17829                 "%voidf     = OpTypeFunction %void\n"
17830                 "%i32       = OpTypeInt 32 1\n"
17831                 "%u32       = OpTypeInt 32 0\n"
17832                 "%f32       = OpTypeFloat 32\n"
17833                 // Custom types
17834                 "${compositeDecl}"
17835                 // %u32vec3 if not already declared in ${compositeDecl}
17836                 "${u32vec3Decl:opt}"
17837                 "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
17838                 // Inherited from composite
17839                 "%composite_p = OpTypePointer Function ${compositeType}\n"
17840                 "%struct_t  = OpTypeStruct${structType}\n"
17841                 "%struct_p  = OpTypePointer Function %struct_t\n"
17842                 // Constants
17843                 "${filler}"
17844                 "${accessChainConstDeclaration}"
17845                 // Inherited from custom
17846                 "%customptr = OpTypePointer Uniform ${customType}\n"
17847                 "%customarr = OpTypeRuntimeArray ${customType}\n"
17848                 "%buf       = OpTypeStruct %customarr\n"
17849                 "%bufptr    = OpTypePointer Uniform %buf\n"
17850                 "%indata    = OpVariable %bufptr Uniform\n"
17851                 "%outdata   = OpVariable %bufptr Uniform\n"
17852
17853                 "%id        = OpVariable %uvec3ptr Input\n"
17854                 "%zero      = OpConstant %u32 0\n"
17855                 "%main      = OpFunction %void None %voidf\n"
17856                 "%label     = OpLabel\n"
17857                 "%struct_v  = OpVariable %struct_p Function\n"
17858                 "%idval     = OpLoad %u32vec3 %id\n"
17859                 "%x         = OpCompositeExtract %u32 %idval 0\n"
17860                 // Create the input/output type
17861                 "%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
17862                 "%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
17863                 // Read the input value
17864                 "%inval     = OpLoad ${customType} %inloc\n"
17865                 // Create the composite and fill it
17866                 "${compositeConstruct}"
17867                 // Create the struct and fill it with the composite
17868                 "%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
17869                 // Insert the value
17870                 "%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
17871                 // Store the object
17872                 "             OpStore %struct_v %comp_obj\n"
17873                 // Get deepest possible composite pointer
17874                 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
17875                 "%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
17876                 // Read back the stored value
17877                 "%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
17878                 "             OpStore %outloc %read_val\n"
17879                 "             OpReturn\n"
17880                 "             OpFunctionEnd\n"
17881         ).specialize(parameters);
17882 }
17883
17884 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
17885 {
17886         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
17887         de::Random                                              rnd                             (deStringHash(group->getName()));
17888
17889         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
17890         {
17891                 NumberType                                              numberType      = NumberType(type);
17892                 const string                                    typeName        = getNumberTypeName(numberType);
17893                 const string                                    description     = "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
17894                 de::MovePtr<tcu::TestCaseGroup> subGroup        (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
17895
17896                 vector<map<string, string> >    testCases;
17897                 createCompositeCases(testCases, rnd, numberType);
17898
17899                 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
17900                 {
17901                         ComputeShaderSpec       spec;
17902
17903                         // Number of components inside of a struct
17904                         deUint32 structComponents = rnd.getInt(2, 8);
17905                         // Component index value
17906                         deUint32 structIndex = rnd.getInt(0, structComponents - 1);
17907                         AssemblyStructInfo structInfo(structComponents, structIndex);
17908
17909                         spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
17910
17911                         switch (numberType)
17912                         {
17913                                 case NUMBERTYPE_INT32:
17914                                 {
17915                                         deInt32 number = getInt(rnd);
17916                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
17917                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
17918                                         break;
17919                                 }
17920                                 case NUMBERTYPE_UINT32:
17921                                 {
17922                                         deUint32 number = rnd.getUint32();
17923                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
17924                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
17925                                         break;
17926                                 }
17927                                 case NUMBERTYPE_FLOAT32:
17928                                 {
17929                                         float number = rnd.getFloat();
17930                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
17931                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
17932                                         break;
17933                                 }
17934                                 default:
17935                                         DE_ASSERT(false);
17936                         }
17937                         spec.numWorkGroups = IVec3(1, 1, 1);
17938                         subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
17939                 }
17940                 group->addChild(subGroup.release());
17941         }
17942         return group.release();
17943 }
17944
17945 // If the params missing, uninitialized case
17946 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
17947 {
17948         map<string, string> parameters(params);
17949
17950         parameters["customType"]        = getAssemblyTypeName(type);
17951
17952         // Declare the const value, and use it in the initializer
17953         if (params.find("constValue") != params.end())
17954         {
17955                 parameters["variableInitializer"]       = " %const";
17956         }
17957         // Uninitialized case
17958         else
17959         {
17960                 parameters["commentDecl"]       = ";";
17961         }
17962
17963         return StringTemplate(
17964                 "OpCapability Shader\n"
17965                 "OpMemoryModel Logical GLSL450\n"
17966                 "OpEntryPoint GLCompute %main \"main\" %id\n"
17967                 "OpExecutionMode %main LocalSize 1 1 1\n"
17968                 "OpSource GLSL 430\n"
17969                 "OpName %main           \"main\"\n"
17970                 "OpName %id             \"gl_GlobalInvocationID\"\n"
17971                 // Decorators
17972                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
17973                 "OpDecorate %indata DescriptorSet 0\n"
17974                 "OpDecorate %indata Binding 0\n"
17975                 "OpDecorate %outdata DescriptorSet 0\n"
17976                 "OpDecorate %outdata Binding 1\n"
17977                 "OpDecorate %in_arr ArrayStride 4\n"
17978                 "OpDecorate %in_buf BufferBlock\n"
17979                 "OpMemberDecorate %in_buf 0 Offset 0\n"
17980                 // Base types
17981                 "%void       = OpTypeVoid\n"
17982                 "%voidf      = OpTypeFunction %void\n"
17983                 "%u32        = OpTypeInt 32 0\n"
17984                 "%i32        = OpTypeInt 32 1\n"
17985                 "%f32        = OpTypeFloat 32\n"
17986                 "%uvec3      = OpTypeVector %u32 3\n"
17987                 "%uvec3ptr   = OpTypePointer Input %uvec3\n"
17988                 "${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
17989                 // Derived types
17990                 "%in_ptr     = OpTypePointer Uniform ${customType}\n"
17991                 "%in_arr     = OpTypeRuntimeArray ${customType}\n"
17992                 "%in_buf     = OpTypeStruct %in_arr\n"
17993                 "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
17994                 "%indata     = OpVariable %in_bufptr Uniform\n"
17995                 "%outdata    = OpVariable %in_bufptr Uniform\n"
17996                 "%id         = OpVariable %uvec3ptr Input\n"
17997                 "%var_ptr    = OpTypePointer Function ${customType}\n"
17998                 // Constants
17999                 "%zero       = OpConstant %i32 0\n"
18000                 // Main function
18001                 "%main       = OpFunction %void None %voidf\n"
18002                 "%label      = OpLabel\n"
18003                 "%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
18004                 "%idval      = OpLoad %uvec3 %id\n"
18005                 "%x          = OpCompositeExtract %u32 %idval 0\n"
18006                 "%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
18007                 "%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
18008
18009                 "%outval     = OpLoad ${customType} %out_var\n"
18010                 "              OpStore %outloc %outval\n"
18011                 "              OpReturn\n"
18012                 "              OpFunctionEnd\n"
18013         ).specialize(parameters);
18014 }
18015
18016 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
18017 {
18018         DE_ASSERT(outputAllocs.size() != 0);
18019         DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
18020
18021         // Use custom epsilon because of the float->string conversion
18022         const float     epsilon = 0.00001f;
18023
18024         for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
18025         {
18026                 vector<deUint8> expectedBytes;
18027                 float                   expected;
18028                 float                   actual;
18029
18030                 expectedOutputs[outputNdx].getBytes(expectedBytes);
18031                 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
18032                 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
18033
18034                 // Test with epsilon
18035                 if (fabs(expected - actual) > epsilon)
18036                 {
18037                         log << TestLog::Message << "Error: The actual and expected values not matching."
18038                                 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
18039                         return false;
18040                 }
18041         }
18042         return true;
18043 }
18044
18045 // Checks if the driver crash with uninitialized cases
18046 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
18047 {
18048         DE_ASSERT(outputAllocs.size() != 0);
18049         DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
18050
18051         // Copy and discard the result.
18052         for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
18053         {
18054                 vector<deUint8> expectedBytes;
18055                 expectedOutputs[outputNdx].getBytes(expectedBytes);
18056
18057                 const size_t    width                   = expectedBytes.size();
18058                 vector<char>    data                    (width);
18059
18060                 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
18061         }
18062         return true;
18063 }
18064
18065 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
18066 {
18067         de::MovePtr<tcu::TestCaseGroup> group   (new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
18068         de::Random                                              rnd             (deStringHash(group->getName()));
18069
18070         for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
18071         {
18072                 NumberType                                              numberType      = NumberType(type);
18073                 const string                                    typeName        = getNumberTypeName(numberType);
18074                 const string                                    description     = "Test the OpVariable initializer with " + typeName + ".";
18075                 de::MovePtr<tcu::TestCaseGroup> subGroup        (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
18076
18077                 // 2 similar subcases (initialized and uninitialized)
18078                 for (int subCase = 0; subCase < 2; ++subCase)
18079                 {
18080                         ComputeShaderSpec spec;
18081                         spec.numWorkGroups = IVec3(1, 1, 1);
18082
18083                         map<string, string>                             params;
18084
18085                         switch (numberType)
18086                         {
18087                                 case NUMBERTYPE_INT32:
18088                                 {
18089                                         deInt32 number = getInt(rnd);
18090                                         spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
18091                                         spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
18092                                         params["constValue"] = numberToString(number);
18093                                         break;
18094                                 }
18095                                 case NUMBERTYPE_UINT32:
18096                                 {
18097                                         deUint32 number = rnd.getUint32();
18098                                         spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
18099                                         spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
18100                                         params["constValue"] = numberToString(number);
18101                                         break;
18102                                 }
18103                                 case NUMBERTYPE_FLOAT32:
18104                                 {
18105                                         float number = rnd.getFloat();
18106                                         spec.inputs.push_back(createCompositeBuffer<float>(number));
18107                                         spec.outputs.push_back(createCompositeBuffer<float>(number));
18108                                         spec.verifyIO = &compareFloats;
18109                                         params["constValue"] = numberToString(number);
18110                                         break;
18111                                 }
18112                                 default:
18113                                         DE_ASSERT(false);
18114                         }
18115
18116                         // Initialized subcase
18117                         if (!subCase)
18118                         {
18119                                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
18120                                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
18121                         }
18122                         // Uninitialized subcase
18123                         else
18124                         {
18125                                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
18126                                 spec.verifyIO = &passthruVerify;
18127                                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
18128                         }
18129                 }
18130                 group->addChild(subGroup.release());
18131         }
18132         return group.release();
18133 }
18134
18135 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
18136 {
18137         de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
18138         RGBA                                                    defaultColors[4];
18139         map<string, string>                             opNopFragments;
18140
18141         getDefaultColors(defaultColors);
18142
18143         opNopFragments["testfun"]               =
18144                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18145                 "%param1 = OpFunctionParameter %v4f32\n"
18146                 "%label_testfun = OpLabel\n"
18147                 "OpNop\n"
18148                 "OpNop\n"
18149                 "OpNop\n"
18150                 "OpNop\n"
18151                 "OpNop\n"
18152                 "OpNop\n"
18153                 "OpNop\n"
18154                 "OpNop\n"
18155                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
18156                 "%b = OpFAdd %f32 %a %a\n"
18157                 "OpNop\n"
18158                 "%c = OpFSub %f32 %b %a\n"
18159                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
18160                 "OpNop\n"
18161                 "OpNop\n"
18162                 "OpReturnValue %ret\n"
18163                 "OpFunctionEnd\n";
18164
18165         createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
18166
18167         return testGroup.release();
18168 }
18169
18170 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
18171 {
18172         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
18173         RGBA                                                    defaultColors[4];
18174         map<string, string>                             opNameFragments;
18175
18176         getDefaultColors(defaultColors);
18177
18178         opNameFragments["testfun"] =
18179                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18180                 "%param1     = OpFunctionParameter %v4f32\n"
18181                 "%label_func = OpLabel\n"
18182                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
18183                 "%b          = OpFAdd %f32 %a %a\n"
18184                 "%c          = OpFSub %f32 %b %a\n"
18185                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
18186                 "OpReturnValue %ret\n"
18187                 "OpFunctionEnd\n";
18188
18189         opNameFragments["debug"] =
18190                 "OpName %BP_main \"not_main\"";
18191
18192         createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
18193
18194         return testGroup.release();
18195 }
18196
18197 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
18198 {
18199         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
18200
18201         testGroup->addChild(createOpConstantFloat16Tests(testCtx));
18202         testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
18203         testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
18204         testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
18205         testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
18206         testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
18207         testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
18208         testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
18209         testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
18210         testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
18211         testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
18212         testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
18213         testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
18214         testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
18215         testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
18216
18217         return testGroup.release();
18218 }
18219
18220 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
18221 {
18222         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
18223
18224         testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
18225         testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
18226         testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
18227         testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
18228         testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
18229         testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
18230         testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
18231         testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
18232         testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
18233         testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
18234         testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
18235         testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
18236         testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
18237         testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
18238         testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
18239
18240         return testGroup.release();
18241 }
18242
18243 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
18244 {
18245         de::MovePtr<tcu::TestCaseGroup> group                   (new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
18246
18247         de::Random                                              rnd                             (deStringHash(group->getName()));
18248         const int               numElements             = 100;
18249         vector<float>   inputData               (numElements, 0);
18250         vector<float>   outputData              (numElements, 0);
18251         fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
18252
18253         const StringTemplate                    shaderTemplate  (
18254                 "${CAPS}\n"
18255                 "OpMemoryModel Logical GLSL450\n"
18256                 "OpEntryPoint GLCompute %main \"main\" %id\n"
18257                 "OpExecutionMode %main LocalSize 1 1 1\n"
18258                 "OpSource GLSL 430\n"
18259                 "OpName %main           \"main\"\n"
18260                 "OpName %id             \"gl_GlobalInvocationID\"\n"
18261
18262                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
18263
18264                 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
18265
18266                 "%id        = OpVariable %uvec3ptr Input\n"
18267                 "${CONST}\n"
18268                 "%main      = OpFunction %void None %voidf\n"
18269                 "%label     = OpLabel\n"
18270                 "%idval     = OpLoad %uvec3 %id\n"
18271                 "%x         = OpCompositeExtract %u32 %idval 0\n"
18272                 "%inloc     = OpAccessChain %f32ptr %indata %c0i32 %x\n"
18273
18274                 "${TEST}\n"
18275
18276                 "%outloc    = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
18277                 "             OpStore %outloc %res\n"
18278                 "             OpReturn\n"
18279                 "             OpFunctionEnd\n"
18280         );
18281
18282         // Each test case produces 4 boolean values, and we want each of these values
18283         // to come froma different combination of the available bit-sizes, so compute
18284         // all possible combinations here.
18285         vector<deUint32>        widths;
18286         widths.push_back(32);
18287         widths.push_back(16);
18288         widths.push_back(8);
18289
18290         vector<IVec4>   cases;
18291         for (size_t width0 = 0; width0 < widths.size(); width0++)
18292         {
18293                 for (size_t width1 = 0; width1 < widths.size(); width1++)
18294                 {
18295                         for (size_t width2 = 0; width2 < widths.size(); width2++)
18296                         {
18297                                 for (size_t width3 = 0; width3 < widths.size(); width3++)
18298                                 {
18299                                         cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
18300                                 }
18301                         }
18302                 }
18303         }
18304
18305         for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
18306         {
18307                 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
18308                 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
18309                         continue;
18310
18311                 map<string, string>     specializations;
18312                 ComputeShaderSpec       spec;
18313
18314                 // Inject appropriate capabilities and reference constants depending
18315                 // on the bit-sizes required by this test case
18316                 bool hasFloat32 = cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
18317                 bool hasFloat16 = cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
18318                 bool hasInt8    = cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
18319
18320                 string capsStr  = "OpCapability Shader\n";
18321                 string constStr =
18322                         "%c0i32     = OpConstant %i32 0\n"
18323                         "%c1f32     = OpConstant %f32 1.0\n"
18324                         "%c0f32     = OpConstant %f32 0.0\n";
18325
18326                 if (hasFloat32)
18327                 {
18328                         constStr        +=
18329                                 "%c10f32    = OpConstant %f32 10.0\n"
18330                                 "%c25f32    = OpConstant %f32 25.0\n"
18331                                 "%c50f32    = OpConstant %f32 50.0\n"
18332                                 "%c90f32    = OpConstant %f32 90.0\n";
18333                 }
18334
18335                 if (hasFloat16)
18336                 {
18337                         capsStr         += "OpCapability Float16\n";
18338                         constStr        +=
18339                                 "%f16       = OpTypeFloat 16\n"
18340                                 "%c10f16    = OpConstant %f16 10.0\n"
18341                                 "%c25f16    = OpConstant %f16 25.0\n"
18342                                 "%c50f16    = OpConstant %f16 50.0\n"
18343                                 "%c90f16    = OpConstant %f16 90.0\n";
18344                 }
18345
18346                 if (hasInt8)
18347                 {
18348                         capsStr         += "OpCapability Int8\n";
18349                         constStr        +=
18350                                 "%i8        = OpTypeInt 8 1\n"
18351                                 "%c10i8     = OpConstant %i8 10\n"
18352                                 "%c25i8     = OpConstant %i8 25\n"
18353                                 "%c50i8     = OpConstant %i8 50\n"
18354                                 "%c90i8     = OpConstant %i8 90\n";
18355                 }
18356
18357                 // Each invocation reads a different float32 value as input. Depending on
18358                 // the bit-sizes required by the particular test case, we also produce
18359                 // float16 and/or and int8 values by converting from the 32-bit float.
18360                 string testStr  = "";
18361                 testStr                 += "%inval32   = OpLoad %f32 %inloc\n";
18362                 if (hasFloat16)
18363                         testStr         += "%inval16   = OpFConvert %f16 %inval32\n";
18364                 if (hasInt8)
18365                         testStr         += "%inval8    = OpConvertFToS %i8 %inval32\n";
18366
18367                 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
18368                 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
18369                 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
18370                 // other way around, so in this case we want < instead of <=.
18371                 if (cases[caseNdx][0] == 32)
18372                         testStr         += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
18373                 else if (cases[caseNdx][0] == 16)
18374                         testStr         += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
18375                 else
18376                         testStr         += "%cmp1      = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
18377
18378                 if (cases[caseNdx][1] == 32)
18379                         testStr         += "%cmp2      = OpFOrdLessThan %bool %inval32 %c50f32\n";
18380                 else if (cases[caseNdx][1] == 16)
18381                         testStr         += "%cmp2      = OpFOrdLessThan %bool %inval16 %c50f16\n";
18382                 else
18383                         testStr         += "%cmp2      = OpSLessThan %bool %inval8 %c50i8\n";
18384
18385                 if (cases[caseNdx][2] == 32)
18386                         testStr         += "%cmp3      = OpFOrdLessThan %bool %inval32 %c10f32\n";
18387                 else if (cases[caseNdx][2] == 16)
18388                         testStr         += "%cmp3      = OpFOrdLessThan %bool %inval16 %c10f16\n";
18389                 else
18390                         testStr         += "%cmp3      = OpSLessThan %bool %inval8 %c10i8\n";
18391
18392                 if (cases[caseNdx][3] == 32)
18393                         testStr         += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
18394                 else if (cases[caseNdx][3] == 16)
18395                         testStr         += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
18396                 else
18397                         testStr         += "%cmp4      = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
18398
18399                 testStr                 += "%and1      = OpLogicalAnd %bool %cmp1 %cmp2\n";
18400                 testStr                 += "%or1       = OpLogicalOr %bool %cmp3 %cmp4\n";
18401                 testStr                 += "%or2       = OpLogicalOr %bool %and1 %or1\n";
18402                 testStr                 += "%not1      = OpLogicalNot %bool %or2\n";
18403                 testStr                 += "%res       = OpSelect %f32 %not1 %c1f32 %c0f32\n";
18404
18405                 specializations["CAPS"]         = capsStr;
18406                 specializations["CONST"]        = constStr;
18407                 specializations["TEST"]         = testStr;
18408
18409                 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
18410                 for (size_t ndx = 0; ndx < numElements; ++ndx)
18411                         outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
18412
18413                 spec.assembly = shaderTemplate.specialize(specializations);
18414                 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
18415                 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
18416                 spec.numWorkGroups = IVec3(numElements, 1, 1);
18417                 if (hasFloat16)
18418                         spec.requestedVulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_FLOAT16;
18419                 if (hasInt8)
18420                         spec.requestedVulkanFeatures.extFloat16Int8 |= EXTFLOAT16INT8FEATURES_INT8;
18421                 spec.extensions.push_back("VK_KHR_shader_float16_int8");
18422
18423                 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
18424                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
18425         }
18426
18427         return group.release();
18428 }
18429
18430 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
18431 {
18432         de::MovePtr<tcu::TestCaseGroup>         testGroup                       (new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
18433
18434         testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
18435
18436         return testGroup.release();
18437 }
18438
18439 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
18440 {
18441         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
18442         vector<CaseParameter>                   abuseCases;
18443         RGBA                                                    defaultColors[4];
18444         map<string, string>                             opNameFragments;
18445
18446         getOpNameAbuseCases(abuseCases);
18447         getDefaultColors(defaultColors);
18448
18449         opNameFragments["testfun"] =
18450                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18451                 "%param1     = OpFunctionParameter %v4f32\n"
18452                 "%label_func = OpLabel\n"
18453                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
18454                 "%b          = OpFAdd %f32 %a %a\n"
18455                 "%c          = OpFSub %f32 %b %a\n"
18456                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
18457                 "OpReturnValue %ret\n"
18458                 "OpFunctionEnd\n";
18459
18460         for (unsigned int i = 0; i < abuseCases.size(); i++)
18461         {
18462                 string casename;
18463                 casename = string("main") + abuseCases[i].name;
18464
18465                 opNameFragments["debug"] =
18466                         "OpName %BP_main \"" + abuseCases[i].param + "\"";
18467
18468                 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
18469         }
18470
18471         for (unsigned int i = 0; i < abuseCases.size(); i++)
18472         {
18473                 string casename;
18474                 casename = string("b") + abuseCases[i].name;
18475
18476                 opNameFragments["debug"] =
18477                         "OpName %b \"" + abuseCases[i].param + "\"";
18478
18479                 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
18480         }
18481
18482         {
18483                 opNameFragments["debug"] =
18484                         "OpName %test_code \"name1\"\n"
18485                         "OpName %param1    \"name2\"\n"
18486                         "OpName %a         \"name3\"\n"
18487                         "OpName %b         \"name4\"\n"
18488                         "OpName %c         \"name5\"\n"
18489                         "OpName %ret       \"name6\"\n";
18490
18491                 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
18492         }
18493
18494         {
18495                 opNameFragments["debug"] =
18496                         "OpName %test_code \"the_same\"\n"
18497                         "OpName %param1    \"the_same\"\n"
18498                         "OpName %a         \"the_same\"\n"
18499                         "OpName %b         \"the_same\"\n"
18500                         "OpName %c         \"the_same\"\n"
18501                         "OpName %ret       \"the_same\"\n";
18502
18503                 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
18504         }
18505
18506         {
18507                 opNameFragments["debug"] =
18508                         "OpName %BP_main \"to_be\"\n"
18509                         "OpName %BP_main \"or_not\"\n"
18510                         "OpName %BP_main \"to_be\"\n";
18511
18512                 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
18513         }
18514
18515         {
18516                 opNameFragments["debug"] =
18517                         "OpName %b \"to_be\"\n"
18518                         "OpName %b \"or_not\"\n"
18519                         "OpName %b \"to_be\"\n";
18520
18521                 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
18522         }
18523
18524         return abuseGroup.release();
18525 }
18526
18527
18528 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
18529 {
18530         de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
18531         vector<CaseParameter>                   abuseCases;
18532         RGBA                                                    defaultColors[4];
18533         map<string, string>                             opMemberNameFragments;
18534
18535         getOpNameAbuseCases(abuseCases);
18536         getDefaultColors(defaultColors);
18537
18538         opMemberNameFragments["pre_main"] =
18539                 "%f3str = OpTypeStruct %f32 %f32 %f32\n";
18540
18541         opMemberNameFragments["testfun"] =
18542                 "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18543                 "%param1     = OpFunctionParameter %v4f32\n"
18544                 "%label_func = OpLabel\n"
18545                 "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
18546                 "%b          = OpFAdd %f32 %a %a\n"
18547                 "%c          = OpFSub %f32 %b %a\n"
18548                 "%cstr       = OpCompositeConstruct %f3str %c %c %c\n"
18549                 "%d          = OpCompositeExtract %f32 %cstr 0\n"
18550                 "%ret        = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
18551                 "OpReturnValue %ret\n"
18552                 "OpFunctionEnd\n";
18553
18554         for (unsigned int i = 0; i < abuseCases.size(); i++)
18555         {
18556                 string casename;
18557                 casename = string("f3str_x") + abuseCases[i].name;
18558
18559                 opMemberNameFragments["debug"] =
18560                         "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
18561
18562                 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
18563         }
18564
18565         {
18566                 opMemberNameFragments["debug"] =
18567                         "OpMemberName %f3str 0 \"name1\"\n"
18568                         "OpMemberName %f3str 1 \"name2\"\n"
18569                         "OpMemberName %f3str 2 \"name3\"\n";
18570
18571                 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
18572         }
18573
18574         {
18575                 opMemberNameFragments["debug"] =
18576                         "OpMemberName %f3str 0 \"the_same\"\n"
18577                         "OpMemberName %f3str 1 \"the_same\"\n"
18578                         "OpMemberName %f3str 2 \"the_same\"\n";
18579
18580                 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
18581         }
18582
18583         {
18584                 opMemberNameFragments["debug"] =
18585                         "OpMemberName %f3str 0 \"to_be\"\n"
18586                         "OpMemberName %f3str 1 \"or_not\"\n"
18587                         "OpMemberName %f3str 0 \"to_be\"\n"
18588                         "OpMemberName %f3str 2 \"makes_no\"\n"
18589                         "OpMemberName %f3str 0 \"difference\"\n"
18590                         "OpMemberName %f3str 0 \"to_me\"\n";
18591
18592
18593                 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
18594         }
18595
18596         return abuseGroup.release();
18597 }
18598
18599 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
18600 {
18601         vector<deUint32>        result;
18602         de::Random                      rnd             (seed);
18603
18604         result.reserve(numDataPoints);
18605
18606         for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
18607                 result.push_back(rnd.getUint32());
18608
18609         return result;
18610 }
18611
18612 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
18613 {
18614         vector<deUint32>        result;
18615
18616         result.reserve(inData1.size());
18617
18618         for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
18619                 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
18620
18621         return result;
18622 }
18623
18624 template<class SpecResource>
18625 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
18626 {
18627         const deUint32                  numDataPoints   = 16;
18628         const std::string               testName                ("sparse_ids");
18629         const deUint32                  seed                    (deStringHash(testName.c_str()));
18630         const vector<deUint32>  inData1                 (getSparseIdsAbuseData(numDataPoints, seed + 1));
18631         const vector<deUint32>  inData2                 (getSparseIdsAbuseData(numDataPoints, seed + 2));
18632         const vector<deUint32>  outData                 (getSparseIdsAbuseResults(inData1, inData2));
18633         const StringTemplate    preMain
18634         (
18635                 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
18636                 "   %up_u32 = OpTypePointer Uniform %u32\n"
18637                 "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
18638                 "   %SSBO32 = OpTypeStruct %ra_u32\n"
18639                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
18640                 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
18641                 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
18642                 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
18643         );
18644         const StringTemplate    decoration
18645         (
18646                 "OpDecorate %ra_u32 ArrayStride 4\n"
18647                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
18648                 "OpDecorate %SSBO32 BufferBlock\n"
18649                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18650                 "OpDecorate %ssbo_src0 Binding 0\n"
18651                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18652                 "OpDecorate %ssbo_src1 Binding 1\n"
18653                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18654                 "OpDecorate %ssbo_dst Binding 2\n"
18655         );
18656         const StringTemplate    testFun
18657         (
18658                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18659                 "    %param = OpFunctionParameter %v4f32\n"
18660
18661                 "    %entry = OpLabel\n"
18662                 "        %i = OpVariable %fp_i32 Function\n"
18663                 "             OpStore %i %c_i32_0\n"
18664                 "             OpBranch %loop\n"
18665
18666                 "     %loop = OpLabel\n"
18667                 "    %i_cmp = OpLoad %i32 %i\n"
18668                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18669                 "             OpLoopMerge %merge %next None\n"
18670                 "             OpBranchConditional %lt %write %merge\n"
18671
18672                 "    %write = OpLabel\n"
18673                 "      %ndx = OpLoad %i32 %i\n"
18674
18675                 "      %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
18676                 "      %128 = OpLoad %u32 %127\n"
18677
18678                 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
18679                 "  %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
18680                 "  %4194001 = OpLoad %u32 %4194000\n"
18681
18682                 "  %2097151 = OpIAdd %u32 %128 %4194001\n"
18683                 "  %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
18684                 "             OpStore %2097152 %2097151\n"
18685                 "             OpBranch %next\n"
18686
18687                 "     %next = OpLabel\n"
18688                 "    %i_cur = OpLoad %i32 %i\n"
18689                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18690                 "             OpStore %i %i_new\n"
18691                 "             OpBranch %loop\n"
18692
18693                 "    %merge = OpLabel\n"
18694                 "             OpReturnValue %param\n"
18695
18696                 "             OpFunctionEnd\n"
18697         );
18698         SpecResource                    specResource;
18699         map<string, string>             specs;
18700         VulkanFeatures                  features;
18701         map<string, string>             fragments;
18702         vector<string>                  extensions;
18703
18704         specs["num_data_points"]        = de::toString(numDataPoints);
18705
18706         fragments["decoration"]         = decoration.specialize(specs);
18707         fragments["pre_main"]           = preMain.specialize(specs);
18708         fragments["testfun"]            = testFun.specialize(specs);
18709
18710         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18711         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18712         specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18713
18714         features.coreFeatures.vertexPipelineStoresAndAtomics    = true;
18715         features.coreFeatures.fragmentStoresAndAtomics                  = true;
18716
18717         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
18718 }
18719
18720 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
18721 {
18722         vector<deUint32>        result;
18723         de::Random                      rnd             (seed);
18724
18725         result.reserve(numDataPoints);
18726
18727         // Fixed value
18728         result.push_back(1u);
18729
18730         // Random values
18731         for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
18732                 result.push_back(rnd.getUint8());
18733
18734         return result;
18735 }
18736
18737 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
18738 {
18739         vector<deUint32>        result;
18740
18741         result.reserve(inData1.size());
18742
18743         for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
18744                 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
18745
18746         return result;
18747 }
18748
18749 template<class SpecResource>
18750 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
18751 {
18752         const deUint32                  numDataPoints   = 16;
18753         const deUint32                  firstNdx                = 100u;
18754         const deUint32                  sequenceCount   = 10000u;
18755         const std::string               testName                ("lots_ids");
18756         const deUint32                  seed                    (deStringHash(testName.c_str()));
18757         const vector<deUint32>  inData1                 (getLotsIdsAbuseData(numDataPoints, seed + 1));
18758         const vector<deUint32>  inData2                 (getLotsIdsAbuseData(numDataPoints, seed + 2));
18759         const vector<deUint32>  outData                 (getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
18760         const StringTemplate preMain
18761         (
18762                 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
18763                 "   %up_u32 = OpTypePointer Uniform %u32\n"
18764                 "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
18765                 "   %SSBO32 = OpTypeStruct %ra_u32\n"
18766                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
18767                 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
18768                 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
18769                 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
18770         );
18771         const StringTemplate decoration
18772         (
18773                 "OpDecorate %ra_u32 ArrayStride 4\n"
18774                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
18775                 "OpDecorate %SSBO32 BufferBlock\n"
18776                 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18777                 "OpDecorate %ssbo_src0 Binding 0\n"
18778                 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18779                 "OpDecorate %ssbo_src1 Binding 1\n"
18780                 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18781                 "OpDecorate %ssbo_dst Binding 2\n"
18782         );
18783         const StringTemplate testFun
18784         (
18785                 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18786                 "    %param = OpFunctionParameter %v4f32\n"
18787
18788                 "    %entry = OpLabel\n"
18789                 "        %i = OpVariable %fp_i32 Function\n"
18790                 "             OpStore %i %c_i32_0\n"
18791                 "             OpBranch %loop\n"
18792
18793                 "     %loop = OpLabel\n"
18794                 "    %i_cmp = OpLoad %i32 %i\n"
18795                 "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18796                 "             OpLoopMerge %merge %next None\n"
18797                 "             OpBranchConditional %lt %write %merge\n"
18798
18799                 "    %write = OpLabel\n"
18800                 "      %ndx = OpLoad %i32 %i\n"
18801
18802                 "       %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
18803                 "       %91 = OpLoad %u32 %90\n"
18804
18805                 "       %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
18806                 "       %${zeroth_id} = OpLoad %u32 %98\n"
18807
18808                 "${seq}\n"
18809
18810                 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
18811                 "      %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
18812                 "             OpStore %dst %${last_id}\n"
18813                 "             OpBranch %next\n"
18814
18815                 "     %next = OpLabel\n"
18816                 "    %i_cur = OpLoad %i32 %i\n"
18817                 "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18818                 "             OpStore %i %i_new\n"
18819                 "             OpBranch %loop\n"
18820
18821                 "    %merge = OpLabel\n"
18822                 "             OpReturnValue %param\n"
18823
18824                 "             OpFunctionEnd\n"
18825         );
18826         deUint32                                lastId                  = firstNdx;
18827         SpecResource                    specResource;
18828         map<string, string>             specs;
18829         VulkanFeatures                  features;
18830         map<string, string>             fragments;
18831         vector<string>                  extensions;
18832         std::string                             sequence;
18833
18834         for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
18835         {
18836                 const deUint32          sequenceId              = sequenceNdx + firstNdx;
18837                 const std::string       sequenceIdStr   = de::toString(sequenceId);
18838
18839                 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
18840                 lastId = sequenceId;
18841
18842                 if (sequenceNdx == 0)
18843                         sequence.reserve((10 + sequence.length()) * sequenceCount);
18844         }
18845
18846         specs["num_data_points"]        = de::toString(numDataPoints);
18847         specs["zeroth_id"]                      = de::toString(firstNdx - 1);
18848         specs["last_id"]                        = de::toString(lastId);
18849         specs["seq"]                            = sequence;
18850
18851         fragments["decoration"]         = decoration.specialize(specs);
18852         fragments["pre_main"]           = preMain.specialize(specs);
18853         fragments["testfun"]            = testFun.specialize(specs);
18854
18855         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18856         specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18857         specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18858
18859         features.coreFeatures.vertexPipelineStoresAndAtomics    = true;
18860         features.coreFeatures.fragmentStoresAndAtomics                  = true;
18861
18862         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
18863 }
18864
18865 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
18866 {
18867         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
18868
18869         createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
18870         createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
18871
18872         return testGroup.release();
18873 }
18874
18875 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
18876 {
18877         de::MovePtr<tcu::TestCaseGroup> testGroup       (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
18878
18879         createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
18880         createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
18881
18882         return testGroup.release();
18883 }
18884
18885 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
18886 {
18887         const bool testComputePipeline = true;
18888
18889         de::MovePtr<tcu::TestCaseGroup> instructionTests        (new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
18890         de::MovePtr<tcu::TestCaseGroup> computeTests            (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
18891         de::MovePtr<tcu::TestCaseGroup> graphicsTests           (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
18892
18893         computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
18894         computeTests->addChild(createLocalSizeGroup(testCtx));
18895         computeTests->addChild(createOpNopGroup(testCtx));
18896         computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
18897         computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
18898         computeTests->addChild(createOpAtomicGroup(testCtx, false));
18899         computeTests->addChild(createOpAtomicGroup(testCtx, true));                                     // Using new StorageBuffer decoration
18900         computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true));        // Return value validation
18901         computeTests->addChild(createOpAtomicGroup(testCtx, true, 65536, false, true)); // volatile atomics
18902         computeTests->addChild(createOpLineGroup(testCtx));
18903         computeTests->addChild(createOpModuleProcessedGroup(testCtx));
18904         computeTests->addChild(createOpNoLineGroup(testCtx));
18905         computeTests->addChild(createOpConstantNullGroup(testCtx));
18906         computeTests->addChild(createOpConstantCompositeGroup(testCtx));
18907         computeTests->addChild(createOpConstantUsageGroup(testCtx));
18908         computeTests->addChild(createSpecConstantGroup(testCtx));
18909         computeTests->addChild(createOpSourceGroup(testCtx));
18910         computeTests->addChild(createOpSourceExtensionGroup(testCtx));
18911         computeTests->addChild(createDecorationGroupGroup(testCtx));
18912         computeTests->addChild(createOpPhiGroup(testCtx));
18913         computeTests->addChild(createLoopControlGroup(testCtx));
18914         computeTests->addChild(createFunctionControlGroup(testCtx));
18915         computeTests->addChild(createSelectionControlGroup(testCtx));
18916         computeTests->addChild(createBlockOrderGroup(testCtx));
18917         computeTests->addChild(createMultipleShaderGroup(testCtx));
18918         computeTests->addChild(createMemoryAccessGroup(testCtx));
18919         computeTests->addChild(createOpCopyMemoryGroup(testCtx));
18920         computeTests->addChild(createOpCopyObjectGroup(testCtx));
18921         computeTests->addChild(createNoContractionGroup(testCtx));
18922         computeTests->addChild(createOpUndefGroup(testCtx));
18923         computeTests->addChild(createOpUnreachableGroup(testCtx));
18924         computeTests->addChild(createOpQuantizeToF16Group(testCtx));
18925         computeTests->addChild(createOpFRemGroup(testCtx));
18926         computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
18927         computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
18928         computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
18929         computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
18930         computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
18931         computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
18932         computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
18933         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
18934         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
18935         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
18936         computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
18937         computeTests->addChild(createOpCompositeInsertGroup(testCtx));
18938         computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
18939         computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
18940         computeTests->addChild(createOpNMinGroup(testCtx));
18941         computeTests->addChild(createOpNMaxGroup(testCtx));
18942         computeTests->addChild(createOpNClampGroup(testCtx));
18943         {
18944                 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests     (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
18945
18946                 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
18947                 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
18948
18949                 computeTests->addChild(computeAndroidTests.release());
18950         }
18951
18952         computeTests->addChild(create8BitStorageComputeGroup(testCtx));
18953         computeTests->addChild(create16BitStorageComputeGroup(testCtx));
18954         computeTests->addChild(createFloatControlsComputeGroup(testCtx));
18955         computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
18956         computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
18957         computeTests->addChild(createVariableInitComputeGroup(testCtx));
18958         computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
18959         computeTests->addChild(createIndexingComputeGroup(testCtx));
18960         computeTests->addChild(createVariablePointersComputeGroup(testCtx));
18961         computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
18962         computeTests->addChild(createImageSamplerComputeGroup(testCtx));
18963         computeTests->addChild(createOpNameGroup(testCtx));
18964         computeTests->addChild(createOpMemberNameGroup(testCtx));
18965         computeTests->addChild(createPointerParameterComputeGroup(testCtx));
18966         computeTests->addChild(createFloat16Group(testCtx));
18967         computeTests->addChild(createBoolGroup(testCtx));
18968         computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
18969         computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
18970         computeTests->addChild(createSignedIntCompareGroup(testCtx));
18971         computeTests->addChild(createUnusedVariableComputeTests(testCtx));
18972         computeTests->addChild(createPtrAccessChainGroup(testCtx));
18973         computeTests->addChild(createHlslComputeGroup(testCtx));
18974         computeTests->addChild(create64bitCompareComputeGroup(testCtx));
18975
18976         graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
18977         graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
18978         graphicsTests->addChild(createOpNopTests(testCtx));
18979         graphicsTests->addChild(createOpSourceTests(testCtx));
18980         graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
18981         graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
18982         graphicsTests->addChild(createOpLineTests(testCtx));
18983         graphicsTests->addChild(createOpNoLineTests(testCtx));
18984         graphicsTests->addChild(createOpConstantNullTests(testCtx));
18985         graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
18986         graphicsTests->addChild(createMemoryAccessTests(testCtx));
18987         graphicsTests->addChild(createOpUndefTests(testCtx));
18988         graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
18989         graphicsTests->addChild(createModuleTests(testCtx));
18990         graphicsTests->addChild(createUnusedVariableTests(testCtx));
18991         graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
18992         graphicsTests->addChild(createOpPhiTests(testCtx));
18993         graphicsTests->addChild(createNoContractionTests(testCtx));
18994         graphicsTests->addChild(createOpQuantizeTests(testCtx));
18995         graphicsTests->addChild(createLoopTests(testCtx));
18996         graphicsTests->addChild(createSpecConstantTests(testCtx));
18997         graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
18998         graphicsTests->addChild(createBarrierTests(testCtx));
18999         graphicsTests->addChild(createDecorationGroupTests(testCtx));
19000         graphicsTests->addChild(createFRemTests(testCtx));
19001         graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
19002         graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
19003
19004         {
19005                 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests    (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
19006
19007                 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
19008                 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
19009
19010                 graphicsTests->addChild(graphicsAndroidTests.release());
19011         }
19012         graphicsTests->addChild(createOpNameTests(testCtx));
19013         graphicsTests->addChild(createOpNameAbuseTests(testCtx));
19014         graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
19015
19016         graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
19017         graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
19018         graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
19019         graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
19020         graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
19021         graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
19022         graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
19023         graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
19024         graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
19025         graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
19026         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
19027         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
19028         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
19029         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
19030         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
19031         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
19032         graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
19033         graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
19034         graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
19035         graphicsTests->addChild(createFloat16Tests(testCtx));
19036         graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
19037         graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
19038
19039         instructionTests->addChild(computeTests.release());
19040         instructionTests->addChild(graphicsTests.release());
19041         instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
19042
19043         return instructionTests.release();
19044 }
19045
19046 } // SpirVAssembly
19047 } // vkt