1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2015 Google Inc.
6 * Copyright (c) 2016 The Khronos Group Inc.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23 *//*--------------------------------------------------------------------*/
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #ifndef CTS_USES_VULKANSC
89 #include "vktSpvAsmIntegerDotProductTests.hpp"
90 #endif // CTS_USES_VULKANSC
91 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
103 namespace SpirVAssembly
117 using tcu::TestStatus;
120 using tcu::StringTemplate;
123 const bool TEST_WITH_NAN = true;
124 const bool TEST_WITHOUT_NAN = false;
126 const string loadScalarF16FromUint =
127 "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
128 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
129 "%ld_arg_${var}_entry = OpLabel\n"
130 "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
131 "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
132 "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
133 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
134 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
135 "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
136 "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
137 "OpReturnValue %ld_arg_${var}_ex\n"
140 const string loadV2F16FromUint =
141 "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
142 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
143 "%ld_arg_${var}_entry = OpLabel\n"
144 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
145 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
146 "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
147 "OpReturnValue %ld_arg_${var}_cast\n"
150 const string loadV3F16FromUints =
151 // Since we allocate a vec4 worth of values, this case is almost the
152 // same as that case.
153 "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
154 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
155 "%ld_arg_${var}_entry = OpLabel\n"
156 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
157 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
158 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
159 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
160 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
161 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
162 "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
163 "OpReturnValue %ld_arg_${var}_shuffle\n"
166 const string loadV4F16FromUints =
167 "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
168 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
169 "%ld_arg_${var}_entry = OpLabel\n"
170 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
171 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
172 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
173 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
174 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
175 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
176 "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
177 "OpReturnValue %ld_arg_${var}_shuffle\n"
180 const string loadM2x2F16FromUints =
181 "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
182 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
183 "%ld_arg_${var}_entry = OpLabel\n"
184 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
185 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
186 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
187 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
188 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
189 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
190 "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
191 "OpReturnValue %ld_arg_${var}_cons\n"
194 const string loadM2x3F16FromUints =
195 "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
196 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
197 "%ld_arg_${var}_entry = OpLabel\n"
198 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
199 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
200 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
201 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
202 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
203 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
204 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
205 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
206 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
207 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
208 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
209 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
210 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
211 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
212 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
213 "OpReturnValue %ld_arg_${var}_mat\n"
216 const string loadM2x4F16FromUints =
217 "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
218 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
219 "%ld_arg_${var}_entry = OpLabel\n"
220 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
221 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
222 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
223 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
224 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
225 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
226 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
227 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
228 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
229 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
230 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
231 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
232 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
233 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
234 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
235 "OpReturnValue %ld_arg_${var}_mat\n"
238 const string loadM3x2F16FromUints =
239 "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
240 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
241 "%ld_arg_${var}_entry = OpLabel\n"
242 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
243 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
244 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
245 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
246 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
247 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
248 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
249 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
250 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
251 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
252 "OpReturnValue %ld_arg_${var}_mat\n"
255 const string loadM3x3F16FromUints =
256 "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
257 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
258 "%ld_arg_${var}_entry = OpLabel\n"
259 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
260 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
261 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
262 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
263 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
264 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
265 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
266 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
267 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
268 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
269 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
270 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
271 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
272 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
273 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
274 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
275 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
276 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
277 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
278 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
279 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
280 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
281 "OpReturnValue %ld_arg_${var}_mat\n"
284 const string loadM3x4F16FromUints =
285 "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
286 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
287 "%ld_arg_${var}_entry = OpLabel\n"
288 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
289 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
290 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
291 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
292 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
293 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
294 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
295 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
296 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
297 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
298 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
299 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
300 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
301 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
302 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
303 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
304 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
305 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
306 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
307 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
308 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
309 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
310 "OpReturnValue %ld_arg_${var}_mat\n"
313 const string loadM4x2F16FromUints =
314 "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
315 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
316 "%ld_arg_${var}_entry = OpLabel\n"
317 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
318 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
319 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
320 "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
321 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
322 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
323 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
324 "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
325 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
326 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
327 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
328 "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
329 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
330 "OpReturnValue %ld_arg_${var}_mat\n"
333 const string loadM4x3F16FromUints =
334 "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
335 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
336 "%ld_arg_${var}_entry = OpLabel\n"
337 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
338 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
339 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
340 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
341 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
342 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
343 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
344 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
345 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
346 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
347 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
348 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
349 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
350 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
351 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
352 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
353 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
354 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
355 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
356 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
357 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
358 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
359 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
360 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
361 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
362 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
363 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
364 "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
365 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
366 "OpReturnValue %ld_arg_${var}_mat\n"
369 const string loadM4x4F16FromUints =
370 "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
371 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
372 "%ld_arg_${var}_entry = OpLabel\n"
373 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
374 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
375 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
376 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
377 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
378 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
379 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
380 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
381 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
382 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
383 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
384 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
385 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
386 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
387 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
388 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
389 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
390 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
391 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
392 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
393 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
394 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
395 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
396 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
397 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
398 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
399 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
400 "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
401 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
402 "OpReturnValue %ld_arg_${var}_mat\n"
405 const string storeScalarF16AsUint =
406 // This version is sensitive to the initial value in the output buffer.
407 // The infrastructure sets all output buffer bits to one before invoking
408 // the shader so this version uses an atomic and to generate the correct
410 "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
411 "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
412 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
413 "%st_fn_${var}_entry = OpLabel\n"
414 "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
415 "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
416 "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
417 "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
418 // Or 16 bits of ones into the half that was not populated with the result.
419 "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
420 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
421 "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
422 "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
423 "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
424 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
425 "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
429 const string storeV2F16AsUint =
430 "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
431 "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
432 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
433 "%st_fn_${var}_entry = OpLabel\n"
434 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
435 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
436 "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
440 const string storeV3F16AsUints =
441 // Since we allocate a vec4 worth of values, this case can be treated
442 // almost the same as a vec4 case. We will store some extra data that
443 // should not be compared.
444 "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
445 "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
446 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
447 "%st_fn_${var}_entry = OpLabel\n"
448 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
449 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
450 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
451 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
452 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
453 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
454 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
455 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
459 const string storeV4F16AsUints =
460 "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
461 "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
462 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
463 "%st_fn_${var}_entry = OpLabel\n"
464 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
465 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
466 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
467 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
468 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
469 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
470 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
471 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
475 const string storeM2x2F16AsUints =
476 "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
477 "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
478 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
479 "%st_fn_${var}_entry = OpLabel\n"
480 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
481 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
482 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
483 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
484 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
485 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
486 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
487 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
491 const string storeM2x3F16AsUints =
492 // In the extracted elements for 01 and 11 the second element doesn't
494 "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
495 "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
496 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
497 "%st_fn_${var}_entry = OpLabel\n"
498 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
499 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
500 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
501 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
502 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
503 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
504 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
505 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
506 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
507 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
508 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
509 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
510 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
511 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
512 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
513 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
514 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
515 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
519 const string storeM2x4F16AsUints =
520 "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
521 "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
522 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
523 "%st_fn_${var}_entry = OpLabel\n"
524 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
525 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
526 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
527 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
528 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
529 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
530 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
531 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
532 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
533 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
534 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
535 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
536 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
537 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
538 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
539 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
540 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
541 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
545 const string storeM3x2F16AsUints =
546 "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
547 "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
548 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
549 "%st_fn_${var}_entry = OpLabel\n"
550 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
551 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
552 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
553 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
554 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
555 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
556 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
557 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
558 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
559 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
560 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
561 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
565 const string storeM3x3F16AsUints =
566 // The second element of the each broken down vec3 doesn't matter.
567 "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
568 "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
569 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
570 "%st_fn_${var}_entry = OpLabel\n"
571 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
572 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
573 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
574 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
575 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
576 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
577 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
578 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
579 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
580 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
581 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
582 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
583 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
584 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
585 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
586 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
587 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
588 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
589 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
590 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
591 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
592 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
593 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
594 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
595 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
596 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
597 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
601 const string storeM3x4F16AsUints =
602 "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
603 "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
604 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
605 "%st_fn_${var}_entry = OpLabel\n"
606 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
607 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
608 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
609 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
610 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
611 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
612 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
613 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
614 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
615 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
616 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
617 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
618 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
619 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
620 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
621 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
622 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
623 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
624 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
625 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
626 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
627 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
628 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
629 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
630 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
631 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
632 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
636 const string storeM4x2F16AsUints =
637 "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
638 "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
639 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
640 "%st_fn_${var}_entry = OpLabel\n"
641 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
642 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
643 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
644 "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
645 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
646 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
647 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
648 "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
649 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
650 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
651 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
652 "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
653 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
654 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
655 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
656 "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
660 const string storeM4x3F16AsUints =
661 // The last element of each decomposed vec3 doesn't matter.
662 "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
663 "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
664 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
665 "%st_fn_${var}_entry = OpLabel\n"
666 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
667 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
668 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
669 "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
670 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
671 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
672 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
673 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
674 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
675 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
676 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
677 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
678 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
679 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
680 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
681 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
682 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
683 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
684 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
685 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
686 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
687 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
688 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
689 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
690 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
691 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
692 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
693 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
694 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
695 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
696 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
697 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
698 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
699 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
700 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
701 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
705 const string storeM4x4F16AsUints =
706 "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
707 "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
708 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
709 "%st_fn_${var}_entry = OpLabel\n"
710 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
711 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
712 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
713 "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
714 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
715 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
716 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
717 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
718 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
719 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
720 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
721 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
722 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
723 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
724 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
725 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
726 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
727 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
728 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
729 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
730 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
731 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
732 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
733 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
734 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
735 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
736 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
737 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
738 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
739 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
740 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
741 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
742 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
743 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
744 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
745 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
750 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
752 T* const typedPtr = (T*)dst;
753 for (int ndx = 0; ndx < numValues; ndx++)
754 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
757 // Filter is a function that returns true if a value should pass, false otherwise.
758 template<typename T, typename FilterT>
759 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
761 T* const typedPtr = (T*)dst;
763 for (int ndx = 0; ndx < numValues; ndx++)
766 value = de::randomScalar<T>(rnd, minValue, maxValue);
767 while (!filter(value));
769 typedPtr[offset + ndx] = value;
773 // Gets a 64-bit integer with a more logarithmic distribution
774 deInt64 randomInt64LogDistributed (de::Random& rnd)
776 deInt64 val = rnd.getUint64();
777 val &= (1ull << rnd.getInt(1, 63)) - 1;
783 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
785 for (int ndx = 0; ndx < numValues; ndx++)
786 dst[ndx] = randomInt64LogDistributed(rnd);
789 template<typename FilterT>
790 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
792 for (int ndx = 0; ndx < numValues; ndx++)
796 value = randomInt64LogDistributed(rnd);
797 } while (!filter(value));
802 inline bool filterNonNegative (const deInt64 value)
807 inline bool filterPositive (const deInt64 value)
812 inline bool filterNotZero (const deInt64 value)
817 static void floorAll (vector<float>& values)
819 for (size_t i = 0; i < values.size(); i++)
820 values[i] = deFloatFloor(values[i]);
823 static void floorAll (vector<Vec4>& values)
825 for (size_t i = 0; i < values.size(); i++)
826 values[i] = floor(values[i]);
834 CaseParameter (const char* case_, const string& param_) : name(case_), param(param_) {}
837 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
841 // layout(std140, set = 0, binding = 0) readonly buffer Input {
844 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
848 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
851 // uint x = gl_GlobalInvocationID.x;
852 // output_data.elements[x] = -input_data.elements[x];
855 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useLiteralLocalSizeId, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
857 std::ostringstream out;
858 out << "OpCapability Shader\n"
859 "OpMemoryModel Logical GLSL450\n";
861 if (useLiteralLocalSizeId)
863 out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n"
864 "OpExecutionModeId %main LocalSizeId %const_0 %const_1 %const_2\n";
868 out << "OpEntryPoint GLCompute %main \"main\" %id\n";
870 if (useLiteralLocalSize)
872 out << "OpExecutionMode %main LocalSize "
873 << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
877 out << "OpSource GLSL 430\n"
878 "OpName %main \"main\"\n"
879 "OpName %id \"gl_GlobalInvocationID\"\n"
880 "OpDecorate %id BuiltIn GlobalInvocationId\n";
882 if (useSpecConstantWorkgroupSize)
884 out << "OpDecorate %spec_0 SpecId 100\n"
885 "OpDecorate %spec_1 SpecId 101\n"
886 "OpDecorate %spec_2 SpecId 102\n"
887 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
890 if (useLiteralLocalSizeId)
892 out << getComputeAsmInputOutputBufferTraits("Block")
893 << getComputeAsmCommonTypes("StorageBuffer")
894 << getComputeAsmInputOutputBuffer("StorageBuffer")
895 << "%const_0 = OpConstant %u32 " << workGroupSize.x() << "\n"
896 "%const_1 = OpConstant %u32 " << workGroupSize.y() << "\n"
897 "%const_2 = OpConstant %u32 " << workGroupSize.z() << "\n";
901 out << getComputeAsmInputOutputBufferTraits()
902 << getComputeAsmCommonTypes()
903 << getComputeAsmInputOutputBuffer();
906 out << "%id = OpVariable %uvec3ptr Input\n"
907 "%zero = OpConstant %i32 0 \n";
909 if (useSpecConstantWorkgroupSize)
911 out << "%spec_0 = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
912 "%spec_1 = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
913 "%spec_2 = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
914 "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
917 out << "%main = OpFunction %void None %voidf\n"
919 "%idval = OpLoad %uvec3 %id\n"
920 "%ndx = OpCompositeExtract %u32 %idval " << ndx << "\n"
922 "%inloc = OpAccessChain %f32ptr %indata %zero %ndx\n"
923 "%inval = OpLoad %f32 %inloc\n"
924 "%neg = OpFNegate %f32 %inval\n"
925 "%outloc = OpAccessChain %f32ptr %outdata %zero %ndx\n"
926 " OpStore %outloc %neg\n"
933 tcu::TestCaseGroup* createLocalSizeGroup(tcu::TestContext& testCtx, bool useLocalSizeId)
935 const char* groupName[]{ "localsize", "localsize_id" };
937 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId], ""));
938 ComputeShaderSpec spec;
939 de::Random rnd (deStringHash(group->getName()));
940 const deUint32 numElements = 64u;
941 vector<float> positiveFloats (numElements, 0);
942 vector<float> negativeFloats (numElements, 0);
944 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
946 for (size_t ndx = 0; ndx < numElements; ++ndx)
947 negativeFloats[ndx] = -positiveFloats[ndx];
949 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
950 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
954 spec.spirvVersion = SPIRV_VERSION_1_5;
955 spec.extensions.push_back("VK_KHR_maintenance4");
958 spec.numWorkGroups = IVec3(numElements, 1, 1);
960 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, 1), 0u);
961 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
963 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, 1), 0u);
964 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
966 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
968 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, 1), 0u);
969 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
972 spec.numWorkGroups = IVec3(1, 1, 1);
974 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(numElements, 1, 1), 0u);
975 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
977 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(numElements, 1, 1), 0u);
978 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
980 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
982 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(numElements, 1, 1), 0u);
983 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
986 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, numElements, 1), 1u);
987 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
989 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, numElements, 1), 1u);
990 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
992 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
994 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, numElements, 1), 1u);
995 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
998 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, numElements), 2u);
999 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
1001 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, numElements), 2u);
1002 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
1004 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
1006 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, numElements), 2u);
1007 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
1010 return group.release();
1013 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
1015 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
1016 ComputeShaderSpec spec;
1017 de::Random rnd (deStringHash(group->getName()));
1018 const int numElements = 100;
1019 vector<float> positiveFloats (numElements, 0);
1020 vector<float> negativeFloats (numElements, 0);
1022 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1024 for (size_t ndx = 0; ndx < numElements; ++ndx)
1025 negativeFloats[ndx] = -positiveFloats[ndx];
1028 string(getComputeAsmShaderPreamble()) +
1030 "OpSource GLSL 430\n"
1031 "OpName %main \"main\"\n"
1032 "OpName %id \"gl_GlobalInvocationID\"\n"
1034 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1036 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1038 + string(getComputeAsmInputOutputBuffer()) +
1040 "%id = OpVariable %uvec3ptr Input\n"
1041 "%zero = OpConstant %i32 0\n"
1043 "%main = OpFunction %void None %voidf\n"
1044 "%label = OpLabel\n"
1045 "%idval = OpLoad %uvec3 %id\n"
1046 "%x = OpCompositeExtract %u32 %idval 0\n"
1048 " OpNop\n" // Inside a function body
1050 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1051 "%inval = OpLoad %f32 %inloc\n"
1052 "%neg = OpFNegate %f32 %inval\n"
1053 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1054 " OpStore %outloc %neg\n"
1057 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1058 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1059 spec.numWorkGroups = IVec3(numElements, 1, 1);
1061 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1063 return group.release();
1066 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1068 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1069 de::Random rnd (deStringHash(group->getName()));
1070 const int numElements = 100;
1071 vector<float> positiveFloats (numElements, 0);
1072 vector<float> negativeFloats (numElements, 0);
1074 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1076 for (size_t ndx = 0; ndx < numElements; ++ndx)
1077 negativeFloats[ndx] = -positiveFloats[ndx];
1079 const VariableLocation testLocations[] =
1086 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1088 const VariableLocation& location = testLocations[locationNdx];
1092 ComputeShaderSpec spec;
1095 string(getComputeAsmShaderPreamble()) +
1097 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1099 + getUnusedDecorations(location)
1101 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1103 + getUnusedTypesAndConstants()
1105 + string(getComputeAsmInputOutputBuffer())
1107 + getUnusedBuffer() +
1109 "%id = OpVariable %uvec3ptr Input\n"
1110 "%zero = OpConstant %i32 0\n"
1112 "%main = OpFunction %void None %voidf\n"
1113 "%label = OpLabel\n"
1114 "%idval = OpLoad %uvec3 %id\n"
1115 "%x = OpCompositeExtract %u32 %idval 0\n"
1117 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1118 "%inval = OpLoad %f32 %inloc\n"
1119 "%neg = OpFNegate %f32 %inval\n"
1120 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1121 " OpStore %outloc %neg\n"
1124 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1125 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1126 spec.numWorkGroups = IVec3(numElements, 1, 1);
1128 std::string testName = "variable_" + location.toString();
1129 std::string testDescription = "Unused variable test with " + location.toDescription();
1131 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1136 ComputeShaderSpec spec;
1139 string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1141 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1143 + getUnusedDecorations(location)
1145 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1147 + getUnusedTypesAndConstants() +
1149 "%c_i32_0 = OpConstant %i32 0\n"
1150 "%c_i32_1 = OpConstant %i32 1\n"
1152 + string(getComputeAsmInputOutputBuffer())
1154 + getUnusedBuffer() +
1156 "%id = OpVariable %uvec3ptr Input\n"
1157 "%zero = OpConstant %i32 0\n"
1159 "%main = OpFunction %void None %voidf\n"
1160 "%label = OpLabel\n"
1161 "%idval = OpLoad %uvec3 %id\n"
1162 "%x = OpCompositeExtract %u32 %idval 0\n"
1164 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1165 "%inval = OpLoad %f32 %inloc\n"
1166 "%neg = OpFNegate %f32 %inval\n"
1167 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1168 " OpStore %outloc %neg\n"
1172 + getUnusedFunctionBody();
1174 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1175 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1176 spec.numWorkGroups = IVec3(numElements, 1, 1);
1178 std::string testName = "function_" + location.toString();
1179 std::string testDescription = "Unused function test with " + location.toDescription();
1181 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1185 return group.release();
1188 template<bool nanSupported>
1189 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1191 if (outputAllocs.size() != 1)
1194 vector<deUint8> input1Bytes;
1195 vector<deUint8> input2Bytes;
1196 vector<deUint8> expectedBytes;
1198 inputs[0].getBytes(input1Bytes);
1199 inputs[1].getBytes(input2Bytes);
1200 expectedOutputs[0].getBytes(expectedBytes);
1202 const deInt32* const expectedOutputAsInt = reinterpret_cast<const deInt32*>(&expectedBytes.front());
1203 const deInt32* const outputAsInt = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1204 const float* const input1AsFloat = reinterpret_cast<const float*>(&input1Bytes.front());
1205 const float* const input2AsFloat = reinterpret_cast<const float*>(&input2Bytes.front());
1206 bool returnValue = true;
1208 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1210 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1213 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1215 log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1216 returnValue = false;
1222 typedef VkBool32 (*compareFuncType) (float, float);
1228 compareFuncType compareFunc;
1230 OpFUnordCase (const char* _name, const char* _opCode, compareFuncType _compareFunc)
1233 , compareFunc (_compareFunc) {}
1236 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1238 struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1239 cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1240 } while (deGetFalse())
1242 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1244 const string nan = testWithNan ? "_nan" : "";
1245 const string groupName = "opfunord" + nan;
1246 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1247 de::Random rnd (deStringHash(group->getName()));
1248 const int numElements = 100;
1249 vector<OpFUnordCase> cases;
1250 string extensions = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1251 string capabilities = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1252 string exeModes = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1253 const StringTemplate shaderTemplate (
1254 string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1255 "OpSource GLSL 430\n"
1256 "OpName %main \"main\"\n"
1257 "OpName %id \"gl_GlobalInvocationID\"\n"
1259 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1261 "OpDecorate %buf BufferBlock\n"
1262 "OpDecorate %buf2 BufferBlock\n"
1263 "OpDecorate %indata1 DescriptorSet 0\n"
1264 "OpDecorate %indata1 Binding 0\n"
1265 "OpDecorate %indata2 DescriptorSet 0\n"
1266 "OpDecorate %indata2 Binding 1\n"
1267 "OpDecorate %outdata DescriptorSet 0\n"
1268 "OpDecorate %outdata Binding 2\n"
1269 "OpDecorate %f32arr ArrayStride 4\n"
1270 "OpDecorate %i32arr ArrayStride 4\n"
1271 "OpMemberDecorate %buf 0 Offset 0\n"
1272 "OpMemberDecorate %buf2 0 Offset 0\n"
1274 + string(getComputeAsmCommonTypes()) +
1276 "%buf = OpTypeStruct %f32arr\n"
1277 "%bufptr = OpTypePointer Uniform %buf\n"
1278 "%indata1 = OpVariable %bufptr Uniform\n"
1279 "%indata2 = OpVariable %bufptr Uniform\n"
1281 "%buf2 = OpTypeStruct %i32arr\n"
1282 "%buf2ptr = OpTypePointer Uniform %buf2\n"
1283 "%outdata = OpVariable %buf2ptr Uniform\n"
1285 "%id = OpVariable %uvec3ptr Input\n"
1286 "%zero = OpConstant %i32 0\n"
1287 "%consti1 = OpConstant %i32 1\n"
1288 "%constf1 = OpConstant %f32 1.0\n"
1290 "%main = OpFunction %void None %voidf\n"
1291 "%label = OpLabel\n"
1292 "%idval = OpLoad %uvec3 %id\n"
1293 "%x = OpCompositeExtract %u32 %idval 0\n"
1295 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1296 "%inval1 = OpLoad %f32 %inloc1\n"
1297 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1298 "%inval2 = OpLoad %f32 %inloc2\n"
1299 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
1301 "%result = ${OPCODE} %bool %inval1 %inval2\n"
1302 "%int_res = OpSelect %i32 %result %consti1 %zero\n"
1303 " OpStore %outloc %int_res\n"
1306 " OpFunctionEnd\n");
1308 ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1309 ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1310 ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1311 ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1312 ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1313 ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1315 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1317 map<string, string> specializations;
1318 ComputeShaderSpec spec;
1319 const float NaN = std::numeric_limits<float>::quiet_NaN();
1320 vector<float> inputFloats1 (numElements, 0);
1321 vector<float> inputFloats2 (numElements, 0);
1322 vector<deInt32> expectedInts (numElements, 0);
1324 specializations["OPCODE"] = cases[caseNdx].opCode;
1325 spec.assembly = shaderTemplate.specialize(specializations);
1327 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1328 for (size_t ndx = 0; ndx < numElements; ++ndx)
1332 case 0: inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1333 case 1: inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1334 case 2: inputFloats2[ndx] = inputFloats1[ndx]; break;
1335 case 3: inputFloats2[ndx] = NaN; break;
1336 case 4: inputFloats2[ndx] = inputFloats1[ndx]; inputFloats1[ndx] = NaN; break;
1337 case 5: inputFloats2[ndx] = NaN; inputFloats1[ndx] = NaN; break;
1339 expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1342 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1343 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1344 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1345 spec.numWorkGroups = IVec3(numElements, 1, 1);
1346 spec.verifyIO = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1350 spec.extensions.push_back("VK_KHR_shader_float_controls");
1351 spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1354 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1357 return group.release();
1363 const char* assembly;
1364 const char* retValAssembly;
1365 OpAtomicType opAtomic;
1366 deInt32 numOutputElements;
1368 OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1370 , assembly (_assembly)
1371 , retValAssembly (_retValAssembly)
1372 , opAtomic (_opAtomic)
1373 , numOutputElements (_numOutputElements) {}
1376 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1378 std::string groupName ("opatomic");
1379 if (useStorageBuffer)
1380 groupName += "_storage_buffer";
1381 if (verifyReturnValues)
1382 groupName += "_return_values";
1384 groupName += "_volatile";
1385 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1386 vector<OpAtomicCase> cases;
1388 const StringTemplate shaderTemplate (
1390 string("OpCapability Shader\n") +
1391 (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1392 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1393 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1394 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1395 "OpEntryPoint GLCompute %main \"main\" %id\n"
1396 "OpExecutionMode %main LocalSize 1 1 1\n" +
1398 "OpSource GLSL 430\n"
1399 "OpName %main \"main\"\n"
1400 "OpName %id \"gl_GlobalInvocationID\"\n"
1402 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1404 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1405 "OpDecorate %indata DescriptorSet 0\n"
1406 "OpDecorate %indata Binding 0\n"
1407 "OpDecorate %i32arr ArrayStride 4\n"
1408 "OpMemberDecorate %buf 0 Offset 0\n"
1410 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1411 "OpDecorate %sum DescriptorSet 0\n"
1412 "OpDecorate %sum Binding 1\n"
1413 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1415 "${RETVAL_BUF_DECORATE}"
1417 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1419 "%buf = OpTypeStruct %i32arr\n"
1420 "%bufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1421 "%indata = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1423 "%sumbuf = OpTypeStruct %i32arr\n"
1424 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1425 "%sum = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1427 "${RETVAL_BUF_DECL}"
1429 "%id = OpVariable %uvec3ptr Input\n"
1430 "%minusone = OpConstant %i32 -1\n"
1431 "%zero = OpConstant %i32 0\n"
1432 "%one = OpConstant %u32 1\n"
1433 "%two = OpConstant %i32 2\n"
1434 "%five = OpConstant %i32 5\n"
1435 "%volbit = OpConstant %i32 32768\n"
1437 "%main = OpFunction %void None %voidf\n"
1438 "%label = OpLabel\n"
1439 "%idval = OpLoad %uvec3 %id\n"
1440 "%x = OpCompositeExtract %u32 %idval 0\n"
1442 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
1443 "%inval = OpLoad %i32 %inloc\n"
1445 "%outloc = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1447 "${RETVAL_ASSEMBLY}"
1450 " OpFunctionEnd\n");
1452 #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1454 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1455 } while (deGetFalse())
1456 #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1457 #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1459 ADD_OPATOMIC_CASE_1(iadd, "%retv = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1460 " OpStore %retloc %retv\n", OPATOMIC_IADD );
1461 ADD_OPATOMIC_CASE_1(isub, "%retv = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1462 " OpStore %retloc %retv\n", OPATOMIC_ISUB );
1463 ADD_OPATOMIC_CASE_1(iinc, "%retv = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1464 " OpStore %retloc %retv\n", OPATOMIC_IINC );
1465 ADD_OPATOMIC_CASE_1(idec, "%retv = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1466 " OpStore %retloc %retv\n", OPATOMIC_IDEC );
1467 if (!verifyReturnValues)
1469 ADD_OPATOMIC_CASE_N(load, "%inval2 = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1470 " OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1471 ADD_OPATOMIC_CASE_N(store, " OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1474 ADD_OPATOMIC_CASE_N(compex, "%even = OpSMod %i32 %inval %two\n"
1475 " OpStore %outloc %even\n"
1476 "%retv = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1477 " OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1480 #undef ADD_OPATOMIC_CASE
1481 #undef ADD_OPATOMIC_CASE_1
1482 #undef ADD_OPATOMIC_CASE_N
1484 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1486 map<string, string> specializations;
1487 ComputeShaderSpec spec;
1488 vector<deInt32> inputInts (numElements, 0);
1489 vector<deInt32> expected (cases[caseNdx].numOutputElements, -1);
1493 spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1494 spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1496 // volatile, queuefamily scope
1497 specializations["SEMANTICS"] = "%volbit";
1498 specializations["SCOPE"] = "%five";
1502 // non-volatile, device scope
1503 specializations["SEMANTICS"] = "%zero";
1504 specializations["SCOPE"] = "%one";
1506 specializations["INDEX"] = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1507 specializations["INSTRUCTION"] = cases[caseNdx].assembly;
1508 specializations["BLOCK_DECORATION"] = useStorageBuffer ? "Block" : "BufferBlock";
1509 specializations["BLOCK_POINTER_TYPE"] = useStorageBuffer ? "StorageBuffer" : "Uniform";
1511 if (verifyReturnValues)
1513 const StringTemplate blockDecoration (
1515 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1516 "OpDecorate %ret DescriptorSet 0\n"
1517 "OpDecorate %ret Binding 2\n"
1518 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1520 const StringTemplate blockDeclaration (
1522 "%retbuf = OpTypeStruct %i32arr\n"
1523 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1524 "%ret = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1526 specializations["RETVAL_ASSEMBLY"] =
1527 "%retloc = OpAccessChain %i32ptr %ret %zero %x\n"
1528 + std::string(cases[caseNdx].retValAssembly);
1530 specializations["RETVAL_BUF_DECORATE"] = blockDecoration.specialize(specializations);
1531 specializations["RETVAL_BUF_DECL"] = blockDeclaration.specialize(specializations);
1535 specializations["RETVAL_ASSEMBLY"] = "";
1536 specializations["RETVAL_BUF_DECORATE"] = "";
1537 specializations["RETVAL_BUF_DECL"] = "";
1540 spec.assembly = shaderTemplate.specialize(specializations);
1542 // Specialize one more time, to catch things that were in a template parameter
1543 const StringTemplate assemblyTemplate(spec.assembly);
1544 spec.assembly = assemblyTemplate.specialize(specializations);
1546 if (useStorageBuffer)
1547 spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1549 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1550 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1551 if (verifyReturnValues)
1552 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1553 spec.numWorkGroups = IVec3(numElements, 1, 1);
1555 if (verifyReturnValues)
1557 switch (cases[caseNdx].opAtomic)
1560 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1563 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1566 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1569 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1571 case OPATOMIC_COMPEX:
1572 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1575 DE_FATAL("Unsupported OpAtomic type for return value verification");
1578 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1581 return group.release();
1584 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1586 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1587 ComputeShaderSpec spec;
1588 de::Random rnd (deStringHash(group->getName()));
1589 const int numElements = 100;
1590 vector<float> positiveFloats (numElements, 0);
1591 vector<float> negativeFloats (numElements, 0);
1593 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1595 for (size_t ndx = 0; ndx < numElements; ++ndx)
1596 negativeFloats[ndx] = -positiveFloats[ndx];
1599 string(getComputeAsmShaderPreamble()) +
1601 "%fname1 = OpString \"negateInputs.comp\"\n"
1602 "%fname2 = OpString \"negateInputs\"\n"
1604 "OpSource GLSL 430\n"
1605 "OpName %main \"main\"\n"
1606 "OpName %id \"gl_GlobalInvocationID\"\n"
1608 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1610 + string(getComputeAsmInputOutputBufferTraits()) +
1612 "OpLine %fname1 0 0\n" // At the earliest possible position
1614 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1616 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1617 "OpLine %fname2 1 0\n" // Different filenames
1618 "OpLine %fname1 1000 100000\n"
1620 "%id = OpVariable %uvec3ptr Input\n"
1621 "%zero = OpConstant %i32 0\n"
1623 "OpLine %fname1 1 1\n" // Before a function
1625 "%main = OpFunction %void None %voidf\n"
1626 "%label = OpLabel\n"
1628 "OpLine %fname1 1 1\n" // In a function
1630 "%idval = OpLoad %uvec3 %id\n"
1631 "%x = OpCompositeExtract %u32 %idval 0\n"
1632 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1633 "%inval = OpLoad %f32 %inloc\n"
1634 "%neg = OpFNegate %f32 %inval\n"
1635 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1636 " OpStore %outloc %neg\n"
1639 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1640 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1641 spec.numWorkGroups = IVec3(numElements, 1, 1);
1643 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1645 return group.release();
1648 bool veryfiBinaryShader (const ProgramBinary& binary)
1650 const size_t paternCount = 3u;
1651 bool paternsCheck[paternCount] =
1655 const string patersns[paternCount] =
1661 size_t paternNdx = 0u;
1663 for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1665 if (false == paternsCheck[paternNdx] &&
1666 patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1667 deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1669 paternsCheck[paternNdx]= true;
1671 if (paternNdx == paternCount)
1676 for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1678 if (!paternsCheck[ndx])
1685 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1687 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1688 ComputeShaderSpec spec;
1689 de::Random rnd (deStringHash(group->getName()));
1690 const int numElements = 10;
1691 vector<float> positiveFloats (numElements, 0);
1692 vector<float> negativeFloats (numElements, 0);
1694 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1696 for (size_t ndx = 0; ndx < numElements; ++ndx)
1697 negativeFloats[ndx] = -positiveFloats[ndx];
1700 string(getComputeAsmShaderPreamble()) +
1701 "%fname = OpString \"negateInputs.comp\"\n"
1703 "OpSource GLSL 430\n"
1704 "OpName %main \"main\"\n"
1705 "OpName %id \"gl_GlobalInvocationID\"\n"
1706 "OpModuleProcessed \"VULKAN CTS\"\n" //OpModuleProcessed;
1707 "OpModuleProcessed \"Negative values\"\n"
1708 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1709 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1711 + string(getComputeAsmInputOutputBufferTraits())
1713 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1715 "OpLine %fname 0 1\n"
1717 "OpLine %fname 1000 1\n"
1719 "%id = OpVariable %uvec3ptr Input\n"
1720 "%zero = OpConstant %i32 0\n"
1721 "%main = OpFunction %void None %voidf\n"
1723 "%label = OpLabel\n"
1724 "%idval = OpLoad %uvec3 %id\n"
1725 "%x = OpCompositeExtract %u32 %idval 0\n"
1727 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1728 "%inval = OpLoad %f32 %inloc\n"
1729 "%neg = OpFNegate %f32 %inval\n"
1730 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1731 " OpStore %outloc %neg\n"
1734 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1735 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1736 spec.numWorkGroups = IVec3(numElements, 1, 1);
1737 spec.verifyBinary = veryfiBinaryShader;
1738 spec.spirvVersion = SPIRV_VERSION_1_3;
1740 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1742 return group.release();
1745 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1747 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1748 ComputeShaderSpec spec;
1749 de::Random rnd (deStringHash(group->getName()));
1750 const int numElements = 100;
1751 vector<float> positiveFloats (numElements, 0);
1752 vector<float> negativeFloats (numElements, 0);
1754 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1756 for (size_t ndx = 0; ndx < numElements; ++ndx)
1757 negativeFloats[ndx] = -positiveFloats[ndx];
1760 string(getComputeAsmShaderPreamble()) +
1762 "%fname = OpString \"negateInputs.comp\"\n"
1764 "OpSource GLSL 430\n"
1765 "OpName %main \"main\"\n"
1766 "OpName %id \"gl_GlobalInvocationID\"\n"
1768 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1770 + string(getComputeAsmInputOutputBufferTraits()) +
1772 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1774 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1776 "OpLine %fname 0 1\n"
1777 "OpNoLine\n" // Immediately following a preceding OpLine
1779 "OpLine %fname 1000 1\n"
1781 "%id = OpVariable %uvec3ptr Input\n"
1782 "%zero = OpConstant %i32 0\n"
1784 "OpNoLine\n" // Contents after the previous OpLine
1786 "%main = OpFunction %void None %voidf\n"
1787 "%label = OpLabel\n"
1788 "%idval = OpLoad %uvec3 %id\n"
1789 "%x = OpCompositeExtract %u32 %idval 0\n"
1791 "OpNoLine\n" // Multiple OpNoLine
1795 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1796 "%inval = OpLoad %f32 %inloc\n"
1797 "%neg = OpFNegate %f32 %inval\n"
1798 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1799 " OpStore %outloc %neg\n"
1802 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1803 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1804 spec.numWorkGroups = IVec3(numElements, 1, 1);
1806 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1808 return group.release();
1811 // Compare instruction for the contraction compute case.
1812 // Returns true if the output is what is expected from the test case.
1813 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1815 if (outputAllocs.size() != 1)
1818 // Only size is needed because we are not comparing the exact values.
1819 size_t byteSize = expectedOutputs[0].getByteSize();
1821 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1823 for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1824 if (outputAsFloat[i] != 0.f &&
1825 outputAsFloat[i] != -ldexp(1, -24)) {
1833 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1835 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1836 vector<CaseParameter> cases;
1837 const int numElements = 100;
1838 vector<float> inputFloats1 (numElements, 0);
1839 vector<float> inputFloats2 (numElements, 0);
1840 vector<float> outputFloats (numElements, 0);
1841 const StringTemplate shaderTemplate (
1842 string(getComputeAsmShaderPreamble()) +
1844 "OpName %main \"main\"\n"
1845 "OpName %id \"gl_GlobalInvocationID\"\n"
1847 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1851 "OpDecorate %buf BufferBlock\n"
1852 "OpDecorate %indata1 DescriptorSet 0\n"
1853 "OpDecorate %indata1 Binding 0\n"
1854 "OpDecorate %indata2 DescriptorSet 0\n"
1855 "OpDecorate %indata2 Binding 1\n"
1856 "OpDecorate %outdata DescriptorSet 0\n"
1857 "OpDecorate %outdata Binding 2\n"
1858 "OpDecorate %f32arr ArrayStride 4\n"
1859 "OpMemberDecorate %buf 0 Offset 0\n"
1861 + string(getComputeAsmCommonTypes()) +
1863 "%buf = OpTypeStruct %f32arr\n"
1864 "%bufptr = OpTypePointer Uniform %buf\n"
1865 "%indata1 = OpVariable %bufptr Uniform\n"
1866 "%indata2 = OpVariable %bufptr Uniform\n"
1867 "%outdata = OpVariable %bufptr Uniform\n"
1869 "%id = OpVariable %uvec3ptr Input\n"
1870 "%zero = OpConstant %i32 0\n"
1871 "%c_f_m1 = OpConstant %f32 -1.\n"
1873 "%main = OpFunction %void None %voidf\n"
1874 "%label = OpLabel\n"
1875 "%idval = OpLoad %uvec3 %id\n"
1876 "%x = OpCompositeExtract %u32 %idval 0\n"
1877 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1878 "%inval1 = OpLoad %f32 %inloc1\n"
1879 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1880 "%inval2 = OpLoad %f32 %inloc2\n"
1881 "%mul = OpFMul %f32 %inval1 %inval2\n"
1882 "%add = OpFAdd %f32 %mul %c_f_m1\n"
1883 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1884 " OpStore %outloc %add\n"
1886 " OpFunctionEnd\n");
1888 cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1889 cases.push_back(CaseParameter("addition", "OpDecorate %add NoContraction"));
1890 cases.push_back(CaseParameter("both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1892 for (size_t ndx = 0; ndx < numElements; ++ndx)
1894 inputFloats1[ndx] = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1895 inputFloats2[ndx] = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1896 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1897 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1898 // So the final result will be 0.f or 0x1p-24.
1899 // If the operation is combined into a precise fused multiply-add, then the result would be
1900 // 2^-46 (0xa8800000).
1901 outputFloats[ndx] = 0.f;
1904 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1906 map<string, string> specializations;
1907 ComputeShaderSpec spec;
1909 specializations["DECORATION"] = cases[caseNdx].param;
1910 spec.assembly = shaderTemplate.specialize(specializations);
1911 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1912 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1913 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1914 spec.numWorkGroups = IVec3(numElements, 1, 1);
1915 // Check against the two possible answers based on rounding mode.
1916 spec.verifyIO = &compareNoContractCase;
1918 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1920 return group.release();
1923 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1925 if (outputAllocs.size() != 1)
1928 vector<deUint8> expectedBytes;
1929 expectedOutputs[0].getBytes(expectedBytes);
1931 const float* expectedOutputAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
1932 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1934 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1936 const float f0 = expectedOutputAsFloat[idx];
1937 const float f1 = outputAsFloat[idx];
1938 // \todo relative error needs to be fairly high because FRem may be implemented as
1939 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1940 if (deFloatAbs((f1 - f0) / f0) > 0.02)
1947 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1949 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1950 ComputeShaderSpec spec;
1951 de::Random rnd (deStringHash(group->getName()));
1952 const int numElements = 200;
1953 vector<float> inputFloats1 (numElements, 0);
1954 vector<float> inputFloats2 (numElements, 0);
1955 vector<float> outputFloats (numElements, 0);
1957 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1958 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1960 for (size_t ndx = 0; ndx < numElements; ++ndx)
1962 // Guard against divisors near zero.
1963 if (std::fabs(inputFloats2[ndx]) < 1e-3)
1964 inputFloats2[ndx] = 8.f;
1966 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1967 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1971 string(getComputeAsmShaderPreamble()) +
1973 "OpName %main \"main\"\n"
1974 "OpName %id \"gl_GlobalInvocationID\"\n"
1976 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1978 "OpDecorate %buf BufferBlock\n"
1979 "OpDecorate %indata1 DescriptorSet 0\n"
1980 "OpDecorate %indata1 Binding 0\n"
1981 "OpDecorate %indata2 DescriptorSet 0\n"
1982 "OpDecorate %indata2 Binding 1\n"
1983 "OpDecorate %outdata DescriptorSet 0\n"
1984 "OpDecorate %outdata Binding 2\n"
1985 "OpDecorate %f32arr ArrayStride 4\n"
1986 "OpMemberDecorate %buf 0 Offset 0\n"
1988 + string(getComputeAsmCommonTypes()) +
1990 "%buf = OpTypeStruct %f32arr\n"
1991 "%bufptr = OpTypePointer Uniform %buf\n"
1992 "%indata1 = OpVariable %bufptr Uniform\n"
1993 "%indata2 = OpVariable %bufptr Uniform\n"
1994 "%outdata = OpVariable %bufptr Uniform\n"
1996 "%id = OpVariable %uvec3ptr Input\n"
1997 "%zero = OpConstant %i32 0\n"
1999 "%main = OpFunction %void None %voidf\n"
2000 "%label = OpLabel\n"
2001 "%idval = OpLoad %uvec3 %id\n"
2002 "%x = OpCompositeExtract %u32 %idval 0\n"
2003 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2004 "%inval1 = OpLoad %f32 %inloc1\n"
2005 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2006 "%inval2 = OpLoad %f32 %inloc2\n"
2007 "%rem = OpFRem %f32 %inval1 %inval2\n"
2008 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2009 " OpStore %outloc %rem\n"
2013 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2014 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2015 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2016 spec.numWorkGroups = IVec3(numElements, 1, 1);
2017 spec.verifyIO = &compareFRem;
2019 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2021 return group.release();
2024 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2026 if (outputAllocs.size() != 1)
2029 const BufferSp& expectedOutput (expectedOutputs[0].getBuffer());
2030 std::vector<deUint8> data;
2031 expectedOutput->getBytes(data);
2033 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2034 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2036 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2038 const float f0 = expectedOutputAsFloat[idx];
2039 const float f1 = outputAsFloat[idx];
2041 // For NMin, we accept NaN as output if both inputs were NaN.
2042 // Otherwise the NaN is the wrong choise, as on architectures that
2043 // do not handle NaN, those are huge values.
2044 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2051 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
2053 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
2054 ComputeShaderSpec spec;
2055 de::Random rnd (deStringHash(group->getName()));
2056 const int numElements = 200;
2057 vector<float> inputFloats1 (numElements, 0);
2058 vector<float> inputFloats2 (numElements, 0);
2059 vector<float> outputFloats (numElements, 0);
2061 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2062 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2064 // Make the first case a full-NAN case.
2065 inputFloats1[0] = TCU_NAN;
2066 inputFloats2[0] = TCU_NAN;
2068 for (size_t ndx = 0; ndx < numElements; ++ndx)
2070 // By default, pick the smallest
2071 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2073 // Make half of the cases NaN cases
2076 // Alternate between the NaN operand
2079 outputFloats[ndx] = inputFloats2[ndx];
2080 inputFloats1[ndx] = TCU_NAN;
2084 outputFloats[ndx] = inputFloats1[ndx];
2085 inputFloats2[ndx] = TCU_NAN;
2091 "OpCapability Shader\n"
2092 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2093 "OpMemoryModel Logical GLSL450\n"
2094 "OpEntryPoint GLCompute %main \"main\" %id\n"
2095 "OpExecutionMode %main LocalSize 1 1 1\n"
2097 "OpName %main \"main\"\n"
2098 "OpName %id \"gl_GlobalInvocationID\"\n"
2100 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2102 "OpDecorate %buf BufferBlock\n"
2103 "OpDecorate %indata1 DescriptorSet 0\n"
2104 "OpDecorate %indata1 Binding 0\n"
2105 "OpDecorate %indata2 DescriptorSet 0\n"
2106 "OpDecorate %indata2 Binding 1\n"
2107 "OpDecorate %outdata DescriptorSet 0\n"
2108 "OpDecorate %outdata Binding 2\n"
2109 "OpDecorate %f32arr ArrayStride 4\n"
2110 "OpMemberDecorate %buf 0 Offset 0\n"
2112 + string(getComputeAsmCommonTypes()) +
2114 "%buf = OpTypeStruct %f32arr\n"
2115 "%bufptr = OpTypePointer Uniform %buf\n"
2116 "%indata1 = OpVariable %bufptr Uniform\n"
2117 "%indata2 = OpVariable %bufptr Uniform\n"
2118 "%outdata = OpVariable %bufptr Uniform\n"
2120 "%id = OpVariable %uvec3ptr Input\n"
2121 "%zero = OpConstant %i32 0\n"
2123 "%main = OpFunction %void None %voidf\n"
2124 "%label = OpLabel\n"
2125 "%idval = OpLoad %uvec3 %id\n"
2126 "%x = OpCompositeExtract %u32 %idval 0\n"
2127 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2128 "%inval1 = OpLoad %f32 %inloc1\n"
2129 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2130 "%inval2 = OpLoad %f32 %inloc2\n"
2131 "%rem = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2132 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2133 " OpStore %outloc %rem\n"
2137 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2138 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2139 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2140 spec.numWorkGroups = IVec3(numElements, 1, 1);
2141 spec.verifyIO = &compareNMin;
2143 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2145 return group.release();
2148 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2150 if (outputAllocs.size() != 1)
2153 const BufferSp& expectedOutput = expectedOutputs[0].getBuffer();
2154 std::vector<deUint8> data;
2155 expectedOutput->getBytes(data);
2157 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2158 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2160 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2162 const float f0 = expectedOutputAsFloat[idx];
2163 const float f1 = outputAsFloat[idx];
2165 // For NMax, NaN is considered acceptable result, since in
2166 // architectures that do not handle NaNs, those are huge values.
2167 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2174 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2176 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2177 ComputeShaderSpec spec;
2178 de::Random rnd (deStringHash(group->getName()));
2179 const int numElements = 200;
2180 vector<float> inputFloats1 (numElements, 0);
2181 vector<float> inputFloats2 (numElements, 0);
2182 vector<float> outputFloats (numElements, 0);
2184 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2185 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2187 // Make the first case a full-NAN case.
2188 inputFloats1[0] = TCU_NAN;
2189 inputFloats2[0] = TCU_NAN;
2191 for (size_t ndx = 0; ndx < numElements; ++ndx)
2193 // By default, pick the biggest
2194 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2196 // Make half of the cases NaN cases
2199 // Alternate between the NaN operand
2202 outputFloats[ndx] = inputFloats2[ndx];
2203 inputFloats1[ndx] = TCU_NAN;
2207 outputFloats[ndx] = inputFloats1[ndx];
2208 inputFloats2[ndx] = TCU_NAN;
2214 "OpCapability Shader\n"
2215 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2216 "OpMemoryModel Logical GLSL450\n"
2217 "OpEntryPoint GLCompute %main \"main\" %id\n"
2218 "OpExecutionMode %main LocalSize 1 1 1\n"
2220 "OpName %main \"main\"\n"
2221 "OpName %id \"gl_GlobalInvocationID\"\n"
2223 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2225 "OpDecorate %buf BufferBlock\n"
2226 "OpDecorate %indata1 DescriptorSet 0\n"
2227 "OpDecorate %indata1 Binding 0\n"
2228 "OpDecorate %indata2 DescriptorSet 0\n"
2229 "OpDecorate %indata2 Binding 1\n"
2230 "OpDecorate %outdata DescriptorSet 0\n"
2231 "OpDecorate %outdata Binding 2\n"
2232 "OpDecorate %f32arr ArrayStride 4\n"
2233 "OpMemberDecorate %buf 0 Offset 0\n"
2235 + string(getComputeAsmCommonTypes()) +
2237 "%buf = OpTypeStruct %f32arr\n"
2238 "%bufptr = OpTypePointer Uniform %buf\n"
2239 "%indata1 = OpVariable %bufptr Uniform\n"
2240 "%indata2 = OpVariable %bufptr Uniform\n"
2241 "%outdata = OpVariable %bufptr Uniform\n"
2243 "%id = OpVariable %uvec3ptr Input\n"
2244 "%zero = OpConstant %i32 0\n"
2246 "%main = OpFunction %void None %voidf\n"
2247 "%label = OpLabel\n"
2248 "%idval = OpLoad %uvec3 %id\n"
2249 "%x = OpCompositeExtract %u32 %idval 0\n"
2250 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2251 "%inval1 = OpLoad %f32 %inloc1\n"
2252 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2253 "%inval2 = OpLoad %f32 %inloc2\n"
2254 "%rem = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2255 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2256 " OpStore %outloc %rem\n"
2260 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2261 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2262 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2263 spec.numWorkGroups = IVec3(numElements, 1, 1);
2264 spec.verifyIO = &compareNMax;
2266 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2268 return group.release();
2271 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2273 if (outputAllocs.size() != 1)
2276 const BufferSp& expectedOutput = expectedOutputs[0].getBuffer();
2277 std::vector<deUint8> data;
2278 expectedOutput->getBytes(data);
2280 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2281 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2283 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2285 const float e0 = expectedOutputAsFloat[idx * 2];
2286 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2287 const float res = outputAsFloat[idx];
2289 // For NClamp, we have two possible outcomes based on
2290 // whether NaNs are handled or not.
2291 // If either min or max value is NaN, the result is undefined,
2292 // so this test doesn't stress those. If the clamped value is
2293 // NaN, and NaNs are handled, the result is min; if NaNs are not
2294 // handled, they are big values that result in max.
2295 // If all three parameters are NaN, the result should be NaN.
2296 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2297 (deFloatAbs(e0 - res) < 0.00001f) ||
2298 (deFloatAbs(e1 - res) < 0.00001f)))
2305 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2307 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2308 ComputeShaderSpec spec;
2309 de::Random rnd (deStringHash(group->getName()));
2310 const int numElements = 200;
2311 vector<float> inputFloats1 (numElements, 0);
2312 vector<float> inputFloats2 (numElements, 0);
2313 vector<float> inputFloats3 (numElements, 0);
2314 vector<float> outputFloats (numElements * 2, 0);
2316 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2317 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2318 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2320 for (size_t ndx = 0; ndx < numElements; ++ndx)
2322 // Results are only defined if max value is bigger than min value.
2323 if (inputFloats2[ndx] > inputFloats3[ndx])
2325 float t = inputFloats2[ndx];
2326 inputFloats2[ndx] = inputFloats3[ndx];
2327 inputFloats3[ndx] = t;
2330 // By default, do the clamp, setting both possible answers
2331 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2333 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2334 float maxResB = maxResA;
2336 // Alternate between the NaN cases
2339 inputFloats1[ndx] = TCU_NAN;
2340 // If NaN is handled, the result should be same as the clamp minimum.
2341 // If NaN is not handled, the result should clamp to the clamp maximum.
2342 maxResA = inputFloats2[ndx];
2343 maxResB = inputFloats3[ndx];
2347 // Not a NaN case - only one legal result.
2348 maxResA = defaultRes;
2349 maxResB = defaultRes;
2352 outputFloats[ndx * 2] = maxResA;
2353 outputFloats[ndx * 2 + 1] = maxResB;
2356 // Make the first case a full-NAN case.
2357 inputFloats1[0] = TCU_NAN;
2358 inputFloats2[0] = TCU_NAN;
2359 inputFloats3[0] = TCU_NAN;
2360 outputFloats[0] = TCU_NAN;
2361 outputFloats[1] = TCU_NAN;
2364 "OpCapability Shader\n"
2365 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2366 "OpMemoryModel Logical GLSL450\n"
2367 "OpEntryPoint GLCompute %main \"main\" %id\n"
2368 "OpExecutionMode %main LocalSize 1 1 1\n"
2370 "OpName %main \"main\"\n"
2371 "OpName %id \"gl_GlobalInvocationID\"\n"
2373 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2375 "OpDecorate %buf BufferBlock\n"
2376 "OpDecorate %indata1 DescriptorSet 0\n"
2377 "OpDecorate %indata1 Binding 0\n"
2378 "OpDecorate %indata2 DescriptorSet 0\n"
2379 "OpDecorate %indata2 Binding 1\n"
2380 "OpDecorate %indata3 DescriptorSet 0\n"
2381 "OpDecorate %indata3 Binding 2\n"
2382 "OpDecorate %outdata DescriptorSet 0\n"
2383 "OpDecorate %outdata Binding 3\n"
2384 "OpDecorate %f32arr ArrayStride 4\n"
2385 "OpMemberDecorate %buf 0 Offset 0\n"
2387 + string(getComputeAsmCommonTypes()) +
2389 "%buf = OpTypeStruct %f32arr\n"
2390 "%bufptr = OpTypePointer Uniform %buf\n"
2391 "%indata1 = OpVariable %bufptr Uniform\n"
2392 "%indata2 = OpVariable %bufptr Uniform\n"
2393 "%indata3 = OpVariable %bufptr Uniform\n"
2394 "%outdata = OpVariable %bufptr Uniform\n"
2396 "%id = OpVariable %uvec3ptr Input\n"
2397 "%zero = OpConstant %i32 0\n"
2399 "%main = OpFunction %void None %voidf\n"
2400 "%label = OpLabel\n"
2401 "%idval = OpLoad %uvec3 %id\n"
2402 "%x = OpCompositeExtract %u32 %idval 0\n"
2403 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2404 "%inval1 = OpLoad %f32 %inloc1\n"
2405 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2406 "%inval2 = OpLoad %f32 %inloc2\n"
2407 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
2408 "%inval3 = OpLoad %f32 %inloc3\n"
2409 "%rem = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2410 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2411 " OpStore %outloc %rem\n"
2415 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2416 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2417 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2418 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2419 spec.numWorkGroups = IVec3(numElements, 1, 1);
2420 spec.verifyIO = &compareNClamp;
2422 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2424 return group.release();
2427 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2429 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2430 de::Random rnd (deStringHash(group->getName()));
2431 const int numElements = 200;
2433 const struct CaseParams
2436 const char* failMessage; // customized status message
2437 qpTestResult failResult; // override status on failure
2438 int op1Min, op1Max; // operand ranges
2442 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
2443 { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
2445 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2447 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2449 const CaseParams& params = cases[caseNdx];
2450 ComputeShaderSpec spec;
2451 vector<deInt32> inputInts1 (numElements, 0);
2452 vector<deInt32> inputInts2 (numElements, 0);
2453 vector<deInt32> outputInts (numElements, 0);
2455 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2456 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2458 for (int ndx = 0; ndx < numElements; ++ndx)
2460 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2461 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2465 string(getComputeAsmShaderPreamble()) +
2467 "OpName %main \"main\"\n"
2468 "OpName %id \"gl_GlobalInvocationID\"\n"
2470 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2472 "OpDecorate %buf BufferBlock\n"
2473 "OpDecorate %indata1 DescriptorSet 0\n"
2474 "OpDecorate %indata1 Binding 0\n"
2475 "OpDecorate %indata2 DescriptorSet 0\n"
2476 "OpDecorate %indata2 Binding 1\n"
2477 "OpDecorate %outdata DescriptorSet 0\n"
2478 "OpDecorate %outdata Binding 2\n"
2479 "OpDecorate %i32arr ArrayStride 4\n"
2480 "OpMemberDecorate %buf 0 Offset 0\n"
2482 + string(getComputeAsmCommonTypes()) +
2484 "%buf = OpTypeStruct %i32arr\n"
2485 "%bufptr = OpTypePointer Uniform %buf\n"
2486 "%indata1 = OpVariable %bufptr Uniform\n"
2487 "%indata2 = OpVariable %bufptr Uniform\n"
2488 "%outdata = OpVariable %bufptr Uniform\n"
2490 "%id = OpVariable %uvec3ptr Input\n"
2491 "%zero = OpConstant %i32 0\n"
2493 "%main = OpFunction %void None %voidf\n"
2494 "%label = OpLabel\n"
2495 "%idval = OpLoad %uvec3 %id\n"
2496 "%x = OpCompositeExtract %u32 %idval 0\n"
2497 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2498 "%inval1 = OpLoad %i32 %inloc1\n"
2499 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2500 "%inval2 = OpLoad %i32 %inloc2\n"
2501 "%rem = OpSRem %i32 %inval1 %inval2\n"
2502 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2503 " OpStore %outloc %rem\n"
2507 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
2508 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
2509 spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
2510 spec.numWorkGroups = IVec3(numElements, 1, 1);
2511 spec.failResult = params.failResult;
2512 spec.failMessage = params.failMessage;
2514 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2517 return group.release();
2520 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2522 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2523 de::Random rnd (deStringHash(group->getName()));
2524 const int numElements = 200;
2526 const struct CaseParams
2529 const char* failMessage; // customized status message
2530 qpTestResult failResult; // override status on failure
2534 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
2535 { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
2537 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2539 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2541 const CaseParams& params = cases[caseNdx];
2542 ComputeShaderSpec spec;
2543 vector<deInt64> inputInts1 (numElements, 0);
2544 vector<deInt64> inputInts2 (numElements, 0);
2545 vector<deInt64> outputInts (numElements, 0);
2547 if (params.positive)
2549 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2550 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2554 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2555 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2558 for (int ndx = 0; ndx < numElements; ++ndx)
2560 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2561 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2565 "OpCapability Int64\n"
2567 + string(getComputeAsmShaderPreamble()) +
2569 "OpName %main \"main\"\n"
2570 "OpName %id \"gl_GlobalInvocationID\"\n"
2572 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2574 "OpDecorate %buf BufferBlock\n"
2575 "OpDecorate %indata1 DescriptorSet 0\n"
2576 "OpDecorate %indata1 Binding 0\n"
2577 "OpDecorate %indata2 DescriptorSet 0\n"
2578 "OpDecorate %indata2 Binding 1\n"
2579 "OpDecorate %outdata DescriptorSet 0\n"
2580 "OpDecorate %outdata Binding 2\n"
2581 "OpDecorate %i64arr ArrayStride 8\n"
2582 "OpMemberDecorate %buf 0 Offset 0\n"
2584 + string(getComputeAsmCommonTypes())
2585 + string(getComputeAsmCommonInt64Types()) +
2587 "%buf = OpTypeStruct %i64arr\n"
2588 "%bufptr = OpTypePointer Uniform %buf\n"
2589 "%indata1 = OpVariable %bufptr Uniform\n"
2590 "%indata2 = OpVariable %bufptr Uniform\n"
2591 "%outdata = OpVariable %bufptr Uniform\n"
2593 "%id = OpVariable %uvec3ptr Input\n"
2594 "%zero = OpConstant %i64 0\n"
2596 "%main = OpFunction %void None %voidf\n"
2597 "%label = OpLabel\n"
2598 "%idval = OpLoad %uvec3 %id\n"
2599 "%x = OpCompositeExtract %u32 %idval 0\n"
2600 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2601 "%inval1 = OpLoad %i64 %inloc1\n"
2602 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2603 "%inval2 = OpLoad %i64 %inloc2\n"
2604 "%rem = OpSRem %i64 %inval1 %inval2\n"
2605 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2606 " OpStore %outloc %rem\n"
2610 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
2611 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
2612 spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
2613 spec.numWorkGroups = IVec3(numElements, 1, 1);
2614 spec.failResult = params.failResult;
2615 spec.failMessage = params.failMessage;
2617 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2619 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2622 return group.release();
2625 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2627 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2628 de::Random rnd (deStringHash(group->getName()));
2629 const int numElements = 200;
2631 const struct CaseParams
2634 const char* failMessage; // customized status message
2635 qpTestResult failResult; // override status on failure
2636 int op1Min, op1Max; // operand ranges
2640 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
2641 { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
2643 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2645 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2647 const CaseParams& params = cases[caseNdx];
2649 ComputeShaderSpec spec;
2650 vector<deInt32> inputInts1 (numElements, 0);
2651 vector<deInt32> inputInts2 (numElements, 0);
2652 vector<deInt32> outputInts (numElements, 0);
2654 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2655 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2657 for (int ndx = 0; ndx < numElements; ++ndx)
2659 deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2662 outputInts[ndx] = 0;
2664 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2666 // They have the same sign
2667 outputInts[ndx] = rem;
2671 // They have opposite sign. The remainder operation takes the
2672 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2673 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2674 // the result has the correct sign and that it is still
2675 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2677 // See also http://mathforum.org/library/drmath/view/52343.html
2678 outputInts[ndx] = rem + inputInts2[ndx];
2683 string(getComputeAsmShaderPreamble()) +
2685 "OpName %main \"main\"\n"
2686 "OpName %id \"gl_GlobalInvocationID\"\n"
2688 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2690 "OpDecorate %buf BufferBlock\n"
2691 "OpDecorate %indata1 DescriptorSet 0\n"
2692 "OpDecorate %indata1 Binding 0\n"
2693 "OpDecorate %indata2 DescriptorSet 0\n"
2694 "OpDecorate %indata2 Binding 1\n"
2695 "OpDecorate %outdata DescriptorSet 0\n"
2696 "OpDecorate %outdata Binding 2\n"
2697 "OpDecorate %i32arr ArrayStride 4\n"
2698 "OpMemberDecorate %buf 0 Offset 0\n"
2700 + string(getComputeAsmCommonTypes()) +
2702 "%buf = OpTypeStruct %i32arr\n"
2703 "%bufptr = OpTypePointer Uniform %buf\n"
2704 "%indata1 = OpVariable %bufptr Uniform\n"
2705 "%indata2 = OpVariable %bufptr Uniform\n"
2706 "%outdata = OpVariable %bufptr Uniform\n"
2708 "%id = OpVariable %uvec3ptr Input\n"
2709 "%zero = OpConstant %i32 0\n"
2711 "%main = OpFunction %void None %voidf\n"
2712 "%label = OpLabel\n"
2713 "%idval = OpLoad %uvec3 %id\n"
2714 "%x = OpCompositeExtract %u32 %idval 0\n"
2715 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2716 "%inval1 = OpLoad %i32 %inloc1\n"
2717 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2718 "%inval2 = OpLoad %i32 %inloc2\n"
2719 "%rem = OpSMod %i32 %inval1 %inval2\n"
2720 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2721 " OpStore %outloc %rem\n"
2725 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
2726 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
2727 spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
2728 spec.numWorkGroups = IVec3(numElements, 1, 1);
2729 spec.failResult = params.failResult;
2730 spec.failMessage = params.failMessage;
2732 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2735 return group.release();
2738 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2740 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2741 de::Random rnd (deStringHash(group->getName()));
2742 const int numElements = 200;
2744 const struct CaseParams
2747 const char* failMessage; // customized status message
2748 qpTestResult failResult; // override status on failure
2752 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
2753 { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
2755 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2757 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2759 const CaseParams& params = cases[caseNdx];
2761 ComputeShaderSpec spec;
2762 vector<deInt64> inputInts1 (numElements, 0);
2763 vector<deInt64> inputInts2 (numElements, 0);
2764 vector<deInt64> outputInts (numElements, 0);
2767 if (params.positive)
2769 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2770 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2774 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2775 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2778 for (int ndx = 0; ndx < numElements; ++ndx)
2780 deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2783 outputInts[ndx] = 0;
2785 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2787 // They have the same sign
2788 outputInts[ndx] = rem;
2792 // They have opposite sign. The remainder operation takes the
2793 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2794 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2795 // the result has the correct sign and that it is still
2796 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2798 // See also http://mathforum.org/library/drmath/view/52343.html
2799 outputInts[ndx] = rem + inputInts2[ndx];
2804 "OpCapability Int64\n"
2806 + string(getComputeAsmShaderPreamble()) +
2808 "OpName %main \"main\"\n"
2809 "OpName %id \"gl_GlobalInvocationID\"\n"
2811 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2813 "OpDecorate %buf BufferBlock\n"
2814 "OpDecorate %indata1 DescriptorSet 0\n"
2815 "OpDecorate %indata1 Binding 0\n"
2816 "OpDecorate %indata2 DescriptorSet 0\n"
2817 "OpDecorate %indata2 Binding 1\n"
2818 "OpDecorate %outdata DescriptorSet 0\n"
2819 "OpDecorate %outdata Binding 2\n"
2820 "OpDecorate %i64arr ArrayStride 8\n"
2821 "OpMemberDecorate %buf 0 Offset 0\n"
2823 + string(getComputeAsmCommonTypes())
2824 + string(getComputeAsmCommonInt64Types()) +
2826 "%buf = OpTypeStruct %i64arr\n"
2827 "%bufptr = OpTypePointer Uniform %buf\n"
2828 "%indata1 = OpVariable %bufptr Uniform\n"
2829 "%indata2 = OpVariable %bufptr Uniform\n"
2830 "%outdata = OpVariable %bufptr Uniform\n"
2832 "%id = OpVariable %uvec3ptr Input\n"
2833 "%zero = OpConstant %i64 0\n"
2835 "%main = OpFunction %void None %voidf\n"
2836 "%label = OpLabel\n"
2837 "%idval = OpLoad %uvec3 %id\n"
2838 "%x = OpCompositeExtract %u32 %idval 0\n"
2839 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2840 "%inval1 = OpLoad %i64 %inloc1\n"
2841 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2842 "%inval2 = OpLoad %i64 %inloc2\n"
2843 "%rem = OpSMod %i64 %inval1 %inval2\n"
2844 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2845 " OpStore %outloc %rem\n"
2849 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
2850 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
2851 spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
2852 spec.numWorkGroups = IVec3(numElements, 1, 1);
2853 spec.failResult = params.failResult;
2854 spec.failMessage = params.failMessage;
2856 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2858 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2861 return group.release();
2864 // Copy contents in the input buffer to the output buffer.
2865 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2867 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2868 de::Random rnd (deStringHash(group->getName()));
2869 const int numElements = 100;
2871 // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2872 ComputeShaderSpec spec1;
2873 vector<Vec4> inputFloats1 (numElements);
2874 vector<Vec4> outputFloats1 (numElements);
2876 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2878 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2879 floorAll(inputFloats1);
2881 for (size_t ndx = 0; ndx < numElements; ++ndx)
2882 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2885 string(getComputeAsmShaderPreamble()) +
2887 "OpName %main \"main\"\n"
2888 "OpName %id \"gl_GlobalInvocationID\"\n"
2890 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2891 "OpDecorate %vec4arr ArrayStride 16\n"
2893 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2895 "%vec4 = OpTypeVector %f32 4\n"
2896 "%vec4ptr_u = OpTypePointer Uniform %vec4\n"
2897 "%vec4ptr_f = OpTypePointer Function %vec4\n"
2898 "%vec4arr = OpTypeRuntimeArray %vec4\n"
2899 "%buf = OpTypeStruct %vec4arr\n"
2900 "%bufptr = OpTypePointer Uniform %buf\n"
2901 "%indata = OpVariable %bufptr Uniform\n"
2902 "%outdata = OpVariable %bufptr Uniform\n"
2904 "%id = OpVariable %uvec3ptr Input\n"
2905 "%zero = OpConstant %i32 0\n"
2906 "%c_f_0 = OpConstant %f32 0.\n"
2907 "%c_f_0_5 = OpConstant %f32 0.5\n"
2908 "%c_f_1_5 = OpConstant %f32 1.5\n"
2909 "%c_f_2_5 = OpConstant %f32 2.5\n"
2910 "%c_vec4 = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2912 "%main = OpFunction %void None %voidf\n"
2913 "%label = OpLabel\n"
2914 "%v_vec4 = OpVariable %vec4ptr_f Function\n"
2915 "%idval = OpLoad %uvec3 %id\n"
2916 "%x = OpCompositeExtract %u32 %idval 0\n"
2917 "%inloc = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2918 "%outloc = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2919 " OpCopyMemory %v_vec4 %inloc\n"
2920 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2921 "%add = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2922 " OpStore %outloc %add\n"
2926 spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2927 spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2928 spec1.numWorkGroups = IVec3(numElements, 1, 1);
2930 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2932 // The following case copies a float[100] variable from the input buffer to the output buffer.
2933 ComputeShaderSpec spec2;
2934 vector<float> inputFloats2 (numElements);
2935 vector<float> outputFloats2 (numElements);
2937 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2939 for (size_t ndx = 0; ndx < numElements; ++ndx)
2940 outputFloats2[ndx] = inputFloats2[ndx];
2943 string(getComputeAsmShaderPreamble()) +
2945 "OpName %main \"main\"\n"
2946 "OpName %id \"gl_GlobalInvocationID\"\n"
2948 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2949 "OpDecorate %f32arr100 ArrayStride 4\n"
2951 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2953 "%hundred = OpConstant %u32 100\n"
2954 "%f32arr100 = OpTypeArray %f32 %hundred\n"
2955 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2956 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2957 "%buf = OpTypeStruct %f32arr100\n"
2958 "%bufptr = OpTypePointer Uniform %buf\n"
2959 "%indata = OpVariable %bufptr Uniform\n"
2960 "%outdata = OpVariable %bufptr Uniform\n"
2962 "%id = OpVariable %uvec3ptr Input\n"
2963 "%zero = OpConstant %i32 0\n"
2965 "%main = OpFunction %void None %voidf\n"
2966 "%label = OpLabel\n"
2967 "%var = OpVariable %f32arr100ptr_f Function\n"
2968 "%inarr = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2969 "%outarr = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2970 " OpCopyMemory %var %inarr\n"
2971 " OpCopyMemory %outarr %var\n"
2975 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2976 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2977 spec2.numWorkGroups = IVec3(1, 1, 1);
2979 group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2981 // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2982 ComputeShaderSpec spec3;
2983 vector<float> inputFloats3 (16);
2984 vector<float> outputFloats3 (16);
2986 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2988 for (size_t ndx = 0; ndx < 16; ++ndx)
2989 outputFloats3[ndx] = inputFloats3[ndx];
2992 string(getComputeAsmShaderPreamble()) +
2994 "OpName %main \"main\"\n"
2995 "OpName %id \"gl_GlobalInvocationID\"\n"
2997 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2998 //"OpMemberDecorate %buf 0 Offset 0\n" - exists in getComputeAsmInputOutputBufferTraits
2999 "OpMemberDecorate %buf 1 Offset 16\n"
3000 "OpMemberDecorate %buf 2 Offset 32\n"
3001 "OpMemberDecorate %buf 3 Offset 48\n"
3003 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3005 "%vec4 = OpTypeVector %f32 4\n"
3006 "%buf = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3007 "%bufptr = OpTypePointer Uniform %buf\n"
3008 "%indata = OpVariable %bufptr Uniform\n"
3009 "%outdata = OpVariable %bufptr Uniform\n"
3010 "%vec4stptr = OpTypePointer Function %buf\n"
3012 "%id = OpVariable %uvec3ptr Input\n"
3013 "%zero = OpConstant %i32 0\n"
3015 "%main = OpFunction %void None %voidf\n"
3016 "%label = OpLabel\n"
3017 "%var = OpVariable %vec4stptr Function\n"
3018 " OpCopyMemory %var %indata\n"
3019 " OpCopyMemory %outdata %var\n"
3023 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3024 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3025 spec3.numWorkGroups = IVec3(1, 1, 1);
3027 group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
3029 // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3030 ComputeShaderSpec spec4;
3031 vector<float> inputFloats4 (numElements);
3032 vector<float> outputFloats4 (numElements);
3034 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3036 for (size_t ndx = 0; ndx < numElements; ++ndx)
3037 outputFloats4[ndx] = -inputFloats4[ndx];
3040 string(getComputeAsmShaderPreamble()) +
3042 "OpName %main \"main\"\n"
3043 "OpName %id \"gl_GlobalInvocationID\"\n"
3045 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3047 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3049 "%f32ptr_f = OpTypePointer Function %f32\n"
3050 "%id = OpVariable %uvec3ptr Input\n"
3051 "%zero = OpConstant %i32 0\n"
3053 "%main = OpFunction %void None %voidf\n"
3054 "%label = OpLabel\n"
3055 "%var = OpVariable %f32ptr_f Function\n"
3056 "%idval = OpLoad %uvec3 %id\n"
3057 "%x = OpCompositeExtract %u32 %idval 0\n"
3058 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3059 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3060 " OpCopyMemory %var %inloc\n"
3061 "%val = OpLoad %f32 %var\n"
3062 "%neg = OpFNegate %f32 %val\n"
3063 " OpStore %outloc %neg\n"
3067 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3068 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3069 spec4.numWorkGroups = IVec3(numElements, 1, 1);
3071 group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3073 return group.release();
3076 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3078 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3079 ComputeShaderSpec spec;
3080 de::Random rnd (deStringHash(group->getName()));
3081 const int numElements = 100;
3082 vector<float> inputFloats (numElements, 0);
3083 vector<float> outputFloats (numElements, 0);
3085 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3087 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3088 floorAll(inputFloats);
3090 for (size_t ndx = 0; ndx < numElements; ++ndx)
3091 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3094 string(getComputeAsmShaderPreamble()) +
3096 "OpName %main \"main\"\n"
3097 "OpName %id \"gl_GlobalInvocationID\"\n"
3099 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3101 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3103 "%fmat = OpTypeMatrix %fvec3 3\n"
3104 "%three = OpConstant %u32 3\n"
3105 "%farr = OpTypeArray %f32 %three\n"
3106 "%fst = OpTypeStruct %f32 %f32\n"
3108 + string(getComputeAsmInputOutputBuffer()) +
3110 "%id = OpVariable %uvec3ptr Input\n"
3111 "%zero = OpConstant %i32 0\n"
3112 "%c_f = OpConstant %f32 1.5\n"
3113 "%c_fvec3 = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3114 "%c_fmat = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3115 "%c_farr = OpConstantComposite %farr %c_f %c_f %c_f\n"
3116 "%c_fst = OpConstantComposite %fst %c_f %c_f\n"
3118 "%main = OpFunction %void None %voidf\n"
3119 "%label = OpLabel\n"
3120 "%c_f_copy = OpCopyObject %f32 %c_f\n"
3121 "%c_fvec3_copy = OpCopyObject %fvec3 %c_fvec3\n"
3122 "%c_fmat_copy = OpCopyObject %fmat %c_fmat\n"
3123 "%c_farr_copy = OpCopyObject %farr %c_farr\n"
3124 "%c_fst_copy = OpCopyObject %fst %c_fst\n"
3125 "%fvec3_elem = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3126 "%fmat_elem = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3127 "%farr_elem = OpCompositeExtract %f32 %c_farr_copy 2\n"
3128 "%fst_elem = OpCompositeExtract %f32 %c_fst_copy 1\n"
3129 // Add up. 1.5 * 5 = 7.5.
3130 "%add1 = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3131 "%add2 = OpFAdd %f32 %add1 %fmat_elem\n"
3132 "%add3 = OpFAdd %f32 %add2 %farr_elem\n"
3133 "%add4 = OpFAdd %f32 %add3 %fst_elem\n"
3135 "%idval = OpLoad %uvec3 %id\n"
3136 "%x = OpCompositeExtract %u32 %idval 0\n"
3137 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3138 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3139 "%inval = OpLoad %f32 %inloc\n"
3140 "%add = OpFAdd %f32 %add4 %inval\n"
3141 " OpStore %outloc %add\n"
3144 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3145 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3146 spec.numWorkGroups = IVec3(numElements, 1, 1);
3148 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3150 return group.release();
3152 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3156 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3157 // float elements[];
3159 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3160 // float elements[];
3163 // void not_called_func() {
3164 // // place OpUnreachable here
3167 // uint modulo4(uint val) {
3168 // switch (val % uint(4)) {
3169 // case 0: return 3;
3170 // case 1: return 2;
3171 // case 2: return 1;
3172 // case 3: return 0;
3173 // default: return 100; // place OpUnreachable here
3179 // // place OpUnreachable here
3183 // uint x = gl_GlobalInvocationID.x;
3184 // if (const5() > modulo4(1000)) {
3185 // output_data.elements[x] = -input_data.elements[x];
3187 // // place OpUnreachable here
3188 // output_data.elements[x] = input_data.elements[x];
3192 void addOpUnreachableAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3194 #ifndef CTS_USES_VULKANSC
3195 static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3203 static const Case cases[] =
3205 { "unreachable-switch-merge-in-loop", "Test containing an unreachable switch merge block inside an infinite loop" },
3208 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3210 const string fileName = cases[i].name + ".amber";
3211 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3219 void addOpSwitchAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3221 #ifndef CTS_USES_VULKANSC
3222 static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3230 static const Case cases[] =
3232 { "switch-case-to-merge-block", "Test switch containing a case that jumps directly to the merge block" },
3235 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3237 const string fileName = cases[i].name + ".amber";
3238 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3246 #ifndef CTS_USES_VULKANSC
3247 tcu::TestCaseGroup* createOpArrayLengthComputeGroup (tcu::TestContext& testCtx)
3249 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "oparraylength", "Test the OpArrayLength instruction"));
3250 static const char dataDir[] = "spirv_assembly/instruction/compute/arraylength";
3258 static const Case cases[] =
3260 { "array-stride-larger-than-element-size", "Test using an unsized array with stride larger than the element size" }
3263 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3265 const string fileName = cases[i].name + ".amber";
3266 group->addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3269 return group.release();
3273 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3275 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3276 ComputeShaderSpec spec;
3277 de::Random rnd (deStringHash(group->getName()));
3278 const int numElements = 100;
3279 vector<float> positiveFloats (numElements, 0);
3280 vector<float> negativeFloats (numElements, 0);
3282 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3284 for (size_t ndx = 0; ndx < numElements; ++ndx)
3285 negativeFloats[ndx] = -positiveFloats[ndx];
3288 string(getComputeAsmShaderPreamble()) +
3290 "OpSource GLSL 430\n"
3291 "OpName %main \"main\"\n"
3292 "OpName %func_not_called_func \"not_called_func(\"\n"
3293 "OpName %func_modulo4 \"modulo4(u1;\"\n"
3294 "OpName %func_const5 \"const5(\"\n"
3295 "OpName %id \"gl_GlobalInvocationID\"\n"
3297 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3299 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3301 "%u32ptr = OpTypePointer Function %u32\n"
3302 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3303 "%unitf = OpTypeFunction %u32\n"
3305 "%id = OpVariable %uvec3ptr Input\n"
3306 "%zero = OpConstant %u32 0\n"
3307 "%one = OpConstant %u32 1\n"
3308 "%two = OpConstant %u32 2\n"
3309 "%three = OpConstant %u32 3\n"
3310 "%four = OpConstant %u32 4\n"
3311 "%five = OpConstant %u32 5\n"
3312 "%hundred = OpConstant %u32 100\n"
3313 "%thousand = OpConstant %u32 1000\n"
3315 + string(getComputeAsmInputOutputBuffer()) +
3318 "%main = OpFunction %void None %voidf\n"
3319 "%main_entry = OpLabel\n"
3320 "%v_thousand = OpVariable %u32ptr Function %thousand\n"
3321 "%idval = OpLoad %uvec3 %id\n"
3322 "%x = OpCompositeExtract %u32 %idval 0\n"
3323 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3324 "%inval = OpLoad %f32 %inloc\n"
3325 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3326 "%ret_const5 = OpFunctionCall %u32 %func_const5\n"
3327 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3328 "%cmp_gt = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3329 " OpSelectionMerge %if_end None\n"
3330 " OpBranchConditional %cmp_gt %if_true %if_false\n"
3331 "%if_true = OpLabel\n"
3332 "%negate = OpFNegate %f32 %inval\n"
3333 " OpStore %outloc %negate\n"
3334 " OpBranch %if_end\n"
3335 "%if_false = OpLabel\n"
3336 " OpUnreachable\n" // Unreachable else branch for if statement
3337 "%if_end = OpLabel\n"
3341 // not_called_function()
3342 "%func_not_called_func = OpFunction %void None %voidf\n"
3343 "%not_called_func_entry = OpLabel\n"
3344 " OpUnreachable\n" // Unreachable entry block in not called static function
3348 "%func_modulo4 = OpFunction %u32 None %uintfuint\n"
3349 "%valptr = OpFunctionParameter %u32ptr\n"
3350 "%modulo4_entry = OpLabel\n"
3351 "%val = OpLoad %u32 %valptr\n"
3352 "%modulo = OpUMod %u32 %val %four\n"
3353 " OpSelectionMerge %switch_merge None\n"
3354 " OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3355 "%case0 = OpLabel\n"
3356 " OpReturnValue %three\n"
3357 "%case1 = OpLabel\n"
3358 " OpReturnValue %two\n"
3359 "%case2 = OpLabel\n"
3360 " OpReturnValue %one\n"
3361 "%case3 = OpLabel\n"
3362 " OpReturnValue %zero\n"
3363 "%default = OpLabel\n"
3364 " OpUnreachable\n" // Unreachable default case for switch statement
3365 "%switch_merge = OpLabel\n"
3366 " OpUnreachable\n" // Unreachable merge block for switch statement
3370 "%func_const5 = OpFunction %u32 None %unitf\n"
3371 "%const5_entry = OpLabel\n"
3372 " OpReturnValue %five\n"
3373 "%unreachable = OpLabel\n"
3374 " OpUnreachable\n" // Unreachable block in function
3376 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3377 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3378 spec.numWorkGroups = IVec3(numElements, 1, 1);
3380 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3382 addOpUnreachableAmberTests(*group, testCtx);
3384 return group.release();
3387 // Assembly code used for testing decoration group is based on GLSL source code:
3391 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3392 // float elements[];
3394 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3395 // float elements[];
3397 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3398 // float elements[];
3400 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3401 // float elements[];
3403 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3404 // float elements[];
3406 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3407 // float elements[];
3411 // uint x = gl_GlobalInvocationID.x;
3412 // output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3414 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3416 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3417 ComputeShaderSpec spec;
3418 de::Random rnd (deStringHash(group->getName()));
3419 const int numElements = 100;
3420 vector<float> inputFloats0 (numElements, 0);
3421 vector<float> inputFloats1 (numElements, 0);
3422 vector<float> inputFloats2 (numElements, 0);
3423 vector<float> inputFloats3 (numElements, 0);
3424 vector<float> inputFloats4 (numElements, 0);
3425 vector<float> outputFloats (numElements, 0);
3427 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3428 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3429 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3430 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3431 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3433 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3434 floorAll(inputFloats0);
3435 floorAll(inputFloats1);
3436 floorAll(inputFloats2);
3437 floorAll(inputFloats3);
3438 floorAll(inputFloats4);
3440 for (size_t ndx = 0; ndx < numElements; ++ndx)
3441 outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3444 string(getComputeAsmShaderPreamble()) +
3446 "OpSource GLSL 430\n"
3447 "OpName %main \"main\"\n"
3448 "OpName %id \"gl_GlobalInvocationID\"\n"
3450 // Not using group decoration on variable.
3451 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3452 // Not using group decoration on type.
3453 "OpDecorate %f32arr ArrayStride 4\n"
3455 "OpDecorate %groups BufferBlock\n"
3456 "OpDecorate %groupm Offset 0\n"
3457 "%groups = OpDecorationGroup\n"
3458 "%groupm = OpDecorationGroup\n"
3460 // Group decoration on multiple structs.
3461 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3462 // Group decoration on multiple struct members.
3463 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3465 "OpDecorate %group1 DescriptorSet 0\n"
3466 "OpDecorate %group3 DescriptorSet 0\n"
3467 "OpDecorate %group3 NonWritable\n"
3468 "OpDecorate %group3 Restrict\n"
3469 "%group0 = OpDecorationGroup\n"
3470 "%group1 = OpDecorationGroup\n"
3471 "%group3 = OpDecorationGroup\n"
3473 // Applying the same decoration group multiple times.
3474 "OpGroupDecorate %group1 %outdata\n"
3475 "OpGroupDecorate %group1 %outdata\n"
3476 "OpGroupDecorate %group1 %outdata\n"
3477 "OpDecorate %outdata DescriptorSet 0\n"
3478 "OpDecorate %outdata Binding 5\n"
3479 // Applying decoration group containing nothing.
3480 "OpGroupDecorate %group0 %indata0\n"
3481 "OpDecorate %indata0 DescriptorSet 0\n"
3482 "OpDecorate %indata0 Binding 0\n"
3483 // Applying decoration group containing one decoration.
3484 "OpGroupDecorate %group1 %indata1\n"
3485 "OpDecorate %indata1 Binding 1\n"
3486 // Applying decoration group containing multiple decorations.
3487 "OpGroupDecorate %group3 %indata2 %indata3\n"
3488 "OpDecorate %indata2 Binding 2\n"
3489 "OpDecorate %indata3 Binding 3\n"
3490 // Applying multiple decoration groups (with overlapping).
3491 "OpGroupDecorate %group0 %indata4\n"
3492 "OpGroupDecorate %group1 %indata4\n"
3493 "OpGroupDecorate %group3 %indata4\n"
3494 "OpDecorate %indata4 Binding 4\n"
3496 + string(getComputeAsmCommonTypes()) +
3498 "%id = OpVariable %uvec3ptr Input\n"
3499 "%zero = OpConstant %i32 0\n"
3501 "%outbuf = OpTypeStruct %f32arr\n"
3502 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3503 "%outdata = OpVariable %outbufptr Uniform\n"
3504 "%inbuf0 = OpTypeStruct %f32arr\n"
3505 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3506 "%indata0 = OpVariable %inbuf0ptr Uniform\n"
3507 "%inbuf1 = OpTypeStruct %f32arr\n"
3508 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3509 "%indata1 = OpVariable %inbuf1ptr Uniform\n"
3510 "%inbuf2 = OpTypeStruct %f32arr\n"
3511 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3512 "%indata2 = OpVariable %inbuf2ptr Uniform\n"
3513 "%inbuf3 = OpTypeStruct %f32arr\n"
3514 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3515 "%indata3 = OpVariable %inbuf3ptr Uniform\n"
3516 "%inbuf4 = OpTypeStruct %f32arr\n"
3517 "%inbufptr = OpTypePointer Uniform %inbuf4\n"
3518 "%indata4 = OpVariable %inbufptr Uniform\n"
3520 "%main = OpFunction %void None %voidf\n"
3521 "%label = OpLabel\n"
3522 "%idval = OpLoad %uvec3 %id\n"
3523 "%x = OpCompositeExtract %u32 %idval 0\n"
3524 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3525 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3526 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3527 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3528 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3529 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3530 "%inval0 = OpLoad %f32 %inloc0\n"
3531 "%inval1 = OpLoad %f32 %inloc1\n"
3532 "%inval2 = OpLoad %f32 %inloc2\n"
3533 "%inval3 = OpLoad %f32 %inloc3\n"
3534 "%inval4 = OpLoad %f32 %inloc4\n"
3535 "%add0 = OpFAdd %f32 %inval0 %inval1\n"
3536 "%add1 = OpFAdd %f32 %add0 %inval2\n"
3537 "%add2 = OpFAdd %f32 %add1 %inval3\n"
3538 "%add = OpFAdd %f32 %add2 %inval4\n"
3539 " OpStore %outloc %add\n"
3542 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3543 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3544 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3545 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3546 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3547 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3548 spec.numWorkGroups = IVec3(numElements, 1, 1);
3550 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3552 return group.release();
3555 enum SpecConstantType
3570 struct SpecConstantValue
3572 SpecConstantType type;
3586 ValueUnion (deInt8 v) : i8(v) {}
3587 ValueUnion (deUint8 v) : u8(v) {}
3588 ValueUnion (deInt16 v) : i16(v) {}
3589 ValueUnion (deUint16 v) : u16(v) {}
3590 ValueUnion (deInt32 v) : i32(v) {}
3591 ValueUnion (deUint32 v) : u32(v) {}
3592 ValueUnion (deInt64 v) : i64(v) {}
3593 ValueUnion (deUint64 v) : u64(v) {}
3594 ValueUnion (tcu::Float16 v) : f16(v) {}
3595 ValueUnion (tcu::Float32 v) : f32(v) {}
3596 ValueUnion (tcu::Float64 v) : f64(v) {}
3599 SpecConstantValue (deInt8 v) : type(SC_INT8) , value(v) {}
3600 SpecConstantValue (deUint8 v) : type(SC_UINT8) , value(v) {}
3601 SpecConstantValue (deInt16 v) : type(SC_INT16) , value(v) {}
3602 SpecConstantValue (deUint16 v) : type(SC_UINT16) , value(v) {}
3603 SpecConstantValue (deInt32 v) : type(SC_INT32) , value(v) {}
3604 SpecConstantValue (deUint32 v) : type(SC_UINT32) , value(v) {}
3605 SpecConstantValue (deInt64 v) : type(SC_INT64) , value(v) {}
3606 SpecConstantValue (deUint64 v) : type(SC_UINT64) , value(v) {}
3607 SpecConstantValue (tcu::Float16 v) : type(SC_FLOAT16) , value(v) {}
3608 SpecConstantValue (tcu::Float32 v) : type(SC_FLOAT32) , value(v) {}
3609 SpecConstantValue (tcu::Float64 v) : type(SC_FLOAT64) , value(v) {}
3611 void appendTo(vkt::SpirVAssembly::SpecConstants& specConstants)
3615 case SC_INT8: specConstants.append(value.i8); break;
3616 case SC_UINT8: specConstants.append(value.u8); break;
3617 case SC_INT16: specConstants.append(value.i16); break;
3618 case SC_UINT16: specConstants.append(value.u16); break;
3619 case SC_INT32: specConstants.append(value.i32); break;
3620 case SC_UINT32: specConstants.append(value.u32); break;
3621 case SC_INT64: specConstants.append(value.i64); break;
3622 case SC_UINT64: specConstants.append(value.u64); break;
3623 case SC_FLOAT16: specConstants.append(value.f16); break;
3624 case SC_FLOAT32: specConstants.append(value.f32); break;
3625 case SC_FLOAT64: specConstants.append(value.f64); break;
3642 using CaseFlags = deUint32;
3644 struct SpecConstantTwoValCase
3646 const std::string caseName;
3647 const std::string scDefinition0;
3648 const std::string scDefinition1;
3649 const std::string scResultType;
3650 const std::string scOperation;
3651 SpecConstantValue scActualValue0;
3652 SpecConstantValue scActualValue1;
3653 const std::string resultOperation;
3654 vector<deInt32> expectedOutput;
3655 CaseFlags caseFlags;
3657 SpecConstantTwoValCase (const std::string& name,
3658 const std::string& definition0,
3659 const std::string& definition1,
3660 const std::string& resultType,
3661 const std::string& operation,
3662 SpecConstantValue value0,
3663 SpecConstantValue value1,
3664 const std::string& resultOp,
3665 const vector<deInt32>& output,
3666 CaseFlags flags = FLAG_NONE)
3668 , scDefinition0 (definition0)
3669 , scDefinition1 (definition1)
3670 , scResultType (resultType)
3671 , scOperation (operation)
3672 , scActualValue0 (value0)
3673 , scActualValue1 (value1)
3674 , resultOperation (resultOp)
3675 , expectedOutput (output)
3680 std::string getSpecConstantOpStructConstantsAndTypes ()
3683 "%zero = OpConstant %i32 0\n"
3684 "%one = OpConstant %i32 1\n"
3685 "%two = OpConstant %i32 2\n"
3686 "%three = OpConstant %i32 3\n"
3687 "%iarr3 = OpTypeArray %i32 %three\n"
3688 "%imat3 = OpTypeArray %iarr3 %three\n"
3689 "%struct = OpTypeStruct %imat3\n"
3693 std::string getSpecConstantOpStructComposites ()
3696 "%iarr3_0 = OpConstantComposite %iarr3 %zero %zero %zero\n"
3697 "%imat3_0 = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3698 "%struct_0 = OpConstantComposite %struct %imat3_0\n"
3702 std::string getSpecConstantOpStructConstBlock ()
3705 "%iarr3_a = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_0 0\n" // Compose (sc_0, sc_1, sc_2)
3706 "%iarr3_b = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_a 1\n"
3707 "%iarr3_c = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_b 2\n"
3709 "%iarr3_d = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_0 0\n" // Compose (sc_1, sc_2, sc_0)
3710 "%iarr3_e = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_d 1\n"
3711 "%iarr3_f = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_e 2\n"
3713 "%iarr3_g = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_0 0\n" // Compose (sc_2, sc_0, sc_1)
3714 "%iarr3_h = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_g 1\n"
3715 "%iarr3_i = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_h 2\n"
3717 "%imat3_a = OpSpecConstantOp %imat3 CompositeInsert %iarr3_c %imat3_0 0\n" // Matrix with the 3 previous arrays.
3718 "%imat3_b = OpSpecConstantOp %imat3 CompositeInsert %iarr3_f %imat3_a 1\n"
3719 "%imat3_c = OpSpecConstantOp %imat3 CompositeInsert %iarr3_i %imat3_b 2\n"
3721 "%struct_a = OpSpecConstantOp %struct CompositeInsert %imat3_c %struct_0 0\n" // Save it in the struct.
3723 "%comp_0_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 0\n" // Extract some component pairs to compare them.
3724 "%comp_1_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 0\n"
3726 "%comp_0_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 1\n"
3727 "%comp_2_2 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 2\n"
3729 "%comp_2_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 0\n"
3730 "%comp_1_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 1\n"
3732 "%cmpres_0 = OpSpecConstantOp %bool IEqual %comp_0_0 %comp_1_0\n" // Must be false.
3733 "%cmpres_1 = OpSpecConstantOp %bool IEqual %comp_0_1 %comp_2_2\n" // Must be true.
3734 "%cmpres_2 = OpSpecConstantOp %bool IEqual %comp_2_0 %comp_1_1\n" // Must be true.
3736 "%mustbe_0 = OpSpecConstantOp %i32 Select %cmpres_0 %one %zero\n" // Must select 0
3737 "%mustbe_1 = OpSpecConstantOp %i32 Select %cmpres_1 %one %zero\n" // Must select 1
3738 "%mustbe_2 = OpSpecConstantOp %i32 Select %cmpres_2 %two %one\n" // Must select 2
3742 std::string getSpecConstantOpStructInstructions ()
3745 // Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3746 // the final result should not be altered.
3747 "%subf_a = OpISub %i32 %one %mustbe_0\n"
3748 "%subf_b = OpIMul %i32 %subf_a %mustbe_1\n"
3749 "%subf_c = OpISub %i32 %mustbe_2 %one\n"
3750 "%factor = OpIMul %i32 %subf_b %subf_c\n"
3751 "%sc_final = OpIMul %i32 %factor %sc_factor\n"
3755 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3757 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3758 vector<SpecConstantTwoValCase> cases;
3759 de::Random rnd (deStringHash(group->getName()));
3760 const int numElements = 100;
3761 vector<deInt32> inputInts (numElements, 0);
3762 vector<deInt32> outputInts1 (numElements, 0);
3763 vector<deInt32> outputInts2 (numElements, 0);
3764 vector<deInt32> outputInts3 (numElements, 0);
3765 vector<deInt32> outputInts4 (numElements, 0);
3766 vector<deInt32> outputInts5 (numElements, 0);
3767 const StringTemplate shaderTemplate (
3768 "${CAPABILITIES:opt}"
3769 + string(getComputeAsmShaderPreamble()) +
3771 "OpName %main \"main\"\n"
3772 "OpName %id \"gl_GlobalInvocationID\"\n"
3774 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3775 "OpDecorate %sc_0 SpecId 0\n"
3776 "OpDecorate %sc_1 SpecId 1\n"
3777 "OpDecorate %i32arr ArrayStride 4\n"
3779 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3781 "${OPTYPE_DEFINITIONS:opt}"
3782 "%buf = OpTypeStruct %i32arr\n"
3783 "%bufptr = OpTypePointer Uniform %buf\n"
3784 "%indata = OpVariable %bufptr Uniform\n"
3785 "%outdata = OpVariable %bufptr Uniform\n"
3787 "%id = OpVariable %uvec3ptr Input\n"
3788 "%zero = OpConstant %i32 0\n"
3790 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
3791 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
3792 "%sc_final = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3794 "%main = OpFunction %void None %voidf\n"
3795 "%label = OpLabel\n"
3796 "${TYPE_CONVERT:opt}"
3797 "%idval = OpLoad %uvec3 %id\n"
3798 "%x = OpCompositeExtract %u32 %idval 0\n"
3799 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
3800 "%inval = OpLoad %i32 %inloc\n"
3801 "%final = ${GEN_RESULT}\n"
3802 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
3803 " OpStore %outloc %final\n"
3805 " OpFunctionEnd\n");
3807 fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3809 for (size_t ndx = 0; ndx < numElements; ++ndx)
3811 outputInts1[ndx] = inputInts[ndx] + 42;
3812 outputInts2[ndx] = inputInts[ndx];
3813 outputInts3[ndx] = inputInts[ndx] - 11200;
3814 outputInts4[ndx] = inputInts[ndx] + 1;
3815 outputInts5[ndx] = inputInts[ndx] - 42;
3818 const char addScToInput[] = "OpIAdd %i32 %inval %sc_final";
3819 const char addSc32ToInput[] = "OpIAdd %i32 %inval %sc_final32";
3820 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_final %inval %zero";
3821 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3823 cases.push_back(SpecConstantTwoValCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 62, -20, addScToInput, outputInts1));
3824 cases.push_back(SpecConstantTwoValCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 100, 58, addScToInput, outputInts1));
3825 cases.push_back(SpecConstantTwoValCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -2, -21, addScToInput, outputInts1));
3826 cases.push_back(SpecConstantTwoValCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1", -126, -3, addScToInput, outputInts1));
3827 cases.push_back(SpecConstantTwoValCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1", 126, 3, addScToInput, outputInts1));
3828 cases.push_back(SpecConstantTwoValCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 7, 3, addScToInput, outputInts4));
3829 cases.push_back(SpecConstantTwoValCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 7, 3, addScToInput, outputInts4));
3830 cases.push_back(SpecConstantTwoValCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1", 342, 50, addScToInput, outputInts1));
3831 cases.push_back(SpecConstantTwoValCase("bitwiseand", " %i32 0", " %i32 0", "%i32", "BitwiseAnd %sc_0 %sc_1", 42, 63, addScToInput, outputInts1));
3832 cases.push_back(SpecConstantTwoValCase("bitwiseor", " %i32 0", " %i32 0", "%i32", "BitwiseOr %sc_0 %sc_1", 34, 8, addScToInput, outputInts1));
3833 cases.push_back(SpecConstantTwoValCase("bitwisexor", " %i32 0", " %i32 0", "%i32", "BitwiseXor %sc_0 %sc_1", 18, 56, addScToInput, outputInts1));
3834 cases.push_back(SpecConstantTwoValCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, 2, addScToInput, outputInts1));
3835 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, 2, addScToInput, outputInts5));
3836 cases.push_back(SpecConstantTwoValCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, 1, addScToInput, outputInts1));
3838 // Shifts for other integer sizes.
3839 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightLogical %sc_0 %sc_1", deInt64{168}, deInt64{2}, addSc32ToInput, outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3840 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightArithmetic %sc_0 %sc_1", deInt64{-168}, deInt64{2}, addSc32ToInput, outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3841 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftLeftLogical %sc_0 %sc_1", deInt64{21}, deInt64{1}, addSc32ToInput, outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3842 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightLogical %sc_0 %sc_1", deInt16{168}, deInt16{2}, addSc32ToInput, outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3843 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightArithmetic %sc_0 %sc_1", deInt16{-168}, deInt16{2}, addSc32ToInput, outputInts5, (FLAG_I16 | FLAG_CONVERT)));
3844 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftLeftLogical %sc_0 %sc_1", deInt16{21}, deInt16{1}, addSc32ToInput, outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3845 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightLogical %sc_0 %sc_1", deInt8{84}, deInt8{1}, addSc32ToInput, outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3846 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightArithmetic %sc_0 %sc_1", deInt8{-84}, deInt8{1}, addSc32ToInput, outputInts5, (FLAG_I8 | FLAG_CONVERT)));
3847 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftLeftLogical %sc_0 %sc_1", deInt8{21}, deInt8{1}, addSc32ToInput, outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3849 // Shifts for other integer sizes but only in the shift amount.
3850 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, deInt64{2}, addScToInput, outputInts1, (FLAG_I64)));
3851 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64"," %i32 0", " %i64 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, deInt64{2}, addScToInput, outputInts5, (FLAG_I64)));
3852 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt64{1}, addScToInput, outputInts1, (FLAG_I64)));
3853 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, deInt16{2}, addScToInput, outputInts1, (FLAG_I16)));
3854 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16"," %i32 0", " %i16 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, deInt16{2}, addScToInput, outputInts5, (FLAG_I16)));
3855 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt16{1}, addScToInput, outputInts1, (FLAG_I16)));
3856 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 84, deInt8{1}, addScToInput, outputInts1, (FLAG_I8)));
3857 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -84, deInt8{1}, addScToInput, outputInts5, (FLAG_I8)));
3858 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt8{1}, addScToInput, outputInts1, (FLAG_I8)));
3860 cases.push_back(SpecConstantTwoValCase("slessthan", " %i32 0", " %i32 0", "%bool", "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc, outputInts2));
3861 cases.push_back(SpecConstantTwoValCase("ulessthan", " %i32 0", " %i32 0", "%bool", "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputInts2));
3862 cases.push_back(SpecConstantTwoValCase("sgreaterthan", " %i32 0", " %i32 0", "%bool", "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputInts2));
3863 cases.push_back(SpecConstantTwoValCase("ugreaterthan", " %i32 0", " %i32 0", "%bool", "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputInts2));
3864 cases.push_back(SpecConstantTwoValCase("slessthanequal", " %i32 0", " %i32 0", "%bool", "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc, outputInts2));
3865 cases.push_back(SpecConstantTwoValCase("ulessthanequal", " %i32 0", " %i32 0", "%bool", "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc, outputInts2));
3866 cases.push_back(SpecConstantTwoValCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool", "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputInts2));
3867 cases.push_back(SpecConstantTwoValCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool", "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputInts2));
3868 cases.push_back(SpecConstantTwoValCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc, outputInts2));
3869 cases.push_back(SpecConstantTwoValCase("inotequal", " %i32 0", " %i32 0", "%bool", "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputInts2));
3870 cases.push_back(SpecConstantTwoValCase("logicaland", "True %bool", "True %bool", "%bool", "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
3871 cases.push_back(SpecConstantTwoValCase("logicalor", "False %bool", "False %bool", "%bool", "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
3872 cases.push_back(SpecConstantTwoValCase("logicalequal", "True %bool", "True %bool", "%bool", "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
3873 cases.push_back(SpecConstantTwoValCase("logicalnotequal", "False %bool", "False %bool", "%bool", "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
3874 cases.push_back(SpecConstantTwoValCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -42, 0, addScToInput, outputInts1));
3875 cases.push_back(SpecConstantTwoValCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -43, 0, addScToInput, outputInts1));
3876 cases.push_back(SpecConstantTwoValCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputInts2));
3877 cases.push_back(SpecConstantTwoValCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %zero", 1, 42, addScToInput, outputInts1));
3878 cases.push_back(SpecConstantTwoValCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -11200, 0, addSc32ToInput, outputInts3, (FLAG_I16 | FLAG_CONVERT)));
3879 cases.push_back(SpecConstantTwoValCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", tcu::Float32{-11200.0}, tcu::Float32{0.0}, addSc32ToInput, outputInts3, (FLAG_F64 | FLAG_CONVERT)));
3880 cases.push_back(SpecConstantTwoValCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0", tcu::Float16{1.0}, tcu::Float16{0.0}, addSc32ToInput, outputInts4, (FLAG_F16 | FLAG_CONVERT)));
3882 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3884 map<string, string> specializations;
3885 ComputeShaderSpec spec;
3887 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
3888 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
3889 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
3890 specializations["SC_OP"] = cases[caseNdx].scOperation;
3891 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
3893 // Special SPIR-V code when using 16-bit integers.
3894 if (cases[caseNdx].caseFlags & FLAG_I16)
3896 spec.requestedVulkanFeatures.coreFeatures.shaderInt16 = VK_TRUE;
3897 specializations["CAPABILITIES"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
3898 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
3899 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3900 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 16-bit integer to 32-bit integer
3903 // Special SPIR-V code when using 64-bit integers.
3904 if (cases[caseNdx].caseFlags & FLAG_I64)
3906 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
3907 specializations["CAPABILITIES"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
3908 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
3909 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3910 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 64-bit integer to 32-bit integer
3913 // Special SPIR-V code when using 64-bit floats.
3914 if (cases[caseNdx].caseFlags & FLAG_F64)
3916 spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3917 specializations["CAPABILITIES"] += "OpCapability Float64\n"; // Adds 64-bit float capability
3918 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
3919 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3920 specializations["TYPE_CONVERT"] += "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 64-bit float to 32-bit integer
3923 // Extension needed for float16 and int8.
3924 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
3925 spec.extensions.push_back("VK_KHR_shader_float16_int8");
3927 // Special SPIR-V code when using 16-bit floats.
3928 if (cases[caseNdx].caseFlags & FLAG_F16)
3930 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3931 specializations["CAPABILITIES"] += "OpCapability Float16\n"; // Adds 16-bit float capability
3932 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
3933 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3934 specializations["TYPE_CONVERT"] += "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 16-bit float to 32-bit integer
3937 // Special SPIR-V code when using 8-bit integers.
3938 if (cases[caseNdx].caseFlags & FLAG_I8)
3940 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
3941 specializations["CAPABILITIES"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
3942 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
3943 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3944 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 8-bit integer to 32-bit integer
3947 spec.assembly = shaderTemplate.specialize(specializations);
3948 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3949 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3950 spec.numWorkGroups = IVec3(numElements, 1, 1);
3951 cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
3952 cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
3954 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), cases[caseNdx].caseName.c_str(), spec));
3957 ComputeShaderSpec spec;
3960 string(getComputeAsmShaderPreamble()) +
3962 "OpName %main \"main\"\n"
3963 "OpName %id \"gl_GlobalInvocationID\"\n"
3965 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3966 "OpDecorate %sc_0 SpecId 0\n"
3967 "OpDecorate %sc_1 SpecId 1\n"
3968 "OpDecorate %sc_2 SpecId 2\n"
3969 "OpDecorate %i32arr ArrayStride 4\n"
3971 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3973 "%ivec3 = OpTypeVector %i32 3\n"
3975 + getSpecConstantOpStructConstantsAndTypes() +
3977 "%buf = OpTypeStruct %i32arr\n"
3978 "%bufptr = OpTypePointer Uniform %buf\n"
3979 "%indata = OpVariable %bufptr Uniform\n"
3980 "%outdata = OpVariable %bufptr Uniform\n"
3982 "%id = OpVariable %uvec3ptr Input\n"
3983 "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
3984 "%vec3_undef = OpUndef %ivec3\n"
3986 + getSpecConstantOpStructComposites () +
3988 "%sc_0 = OpSpecConstant %i32 0\n"
3989 "%sc_1 = OpSpecConstant %i32 0\n"
3990 "%sc_2 = OpSpecConstant %i32 0\n"
3992 + getSpecConstantOpStructConstBlock () +
3994 "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
3995 "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
3996 "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
3997 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
3998 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
3999 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
4000 "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
4001 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
4002 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
4003 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
4004 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
4005 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
4006 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
4008 "%main = OpFunction %void None %voidf\n"
4009 "%label = OpLabel\n"
4011 + getSpecConstantOpStructInstructions() +
4013 "%idval = OpLoad %uvec3 %id\n"
4014 "%x = OpCompositeExtract %u32 %idval 0\n"
4015 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
4016 "%inval = OpLoad %i32 %inloc\n"
4017 "%final = OpIAdd %i32 %inval %sc_final\n"
4018 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
4019 " OpStore %outloc %final\n"
4022 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4023 spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4024 spec.numWorkGroups = IVec3(numElements, 1, 1);
4025 spec.specConstants.append<deInt32>(123);
4026 spec.specConstants.append<deInt32>(56);
4027 spec.specConstants.append<deInt32>(-77);
4029 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
4031 return group.release();
4034 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
4036 ComputeShaderSpec specInt;
4037 ComputeShaderSpec specFloat;
4038 ComputeShaderSpec specFloat16;
4039 ComputeShaderSpec specVec3;
4040 ComputeShaderSpec specMat4;
4041 ComputeShaderSpec specArray;
4042 ComputeShaderSpec specStruct;
4043 de::Random rnd (deStringHash(group->getName()));
4044 const int numElements = 100;
4045 vector<float> inputFloats (numElements, 0);
4046 vector<float> outputFloats (numElements, 0);
4047 vector<deUint32> inputUints (numElements, 0);
4048 vector<deUint32> outputUints (numElements, 0);
4050 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4052 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4053 floorAll(inputFloats);
4055 for (size_t ndx = 0; ndx < numElements; ++ndx)
4057 // Just check if the value is positive or not
4058 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4061 for (size_t ndx = 0; ndx < numElements; ++ndx)
4063 inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4064 outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4067 // All of the tests are of the form:
4071 // if (inputdata > 0)
4078 specFloat.assembly =
4079 string(getComputeAsmShaderPreamble()) +
4081 "OpSource GLSL 430\n"
4082 "OpName %main \"main\"\n"
4083 "OpName %id \"gl_GlobalInvocationID\"\n"
4085 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4087 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4089 "%id = OpVariable %uvec3ptr Input\n"
4090 "%zero = OpConstant %i32 0\n"
4091 "%float_0 = OpConstant %f32 0.0\n"
4092 "%float_1 = OpConstant %f32 1.0\n"
4093 "%float_n1 = OpConstant %f32 -1.0\n"
4095 "%main = OpFunction %void None %voidf\n"
4096 "%entry = OpLabel\n"
4097 "%idval = OpLoad %uvec3 %id\n"
4098 "%x = OpCompositeExtract %u32 %idval 0\n"
4099 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4100 "%inval = OpLoad %f32 %inloc\n"
4102 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4103 " OpSelectionMerge %cm None\n"
4104 " OpBranchConditional %comp %tb %fb\n"
4110 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4112 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4113 " OpStore %outloc %res\n"
4117 specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4118 specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4119 specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4121 specFloat16.assembly =
4122 "OpCapability Shader\n"
4123 "OpCapability Float16\n"
4124 "OpMemoryModel Logical GLSL450\n"
4125 "OpEntryPoint GLCompute %main \"main\" %id\n"
4126 "OpExecutionMode %main LocalSize 1 1 1\n"
4128 "OpSource GLSL 430\n"
4129 "OpName %main \"main\"\n"
4130 "OpName %id \"gl_GlobalInvocationID\"\n"
4132 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4134 "OpDecorate %buf BufferBlock\n"
4135 "OpDecorate %indata DescriptorSet 0\n"
4136 "OpDecorate %indata Binding 0\n"
4137 "OpDecorate %outdata DescriptorSet 0\n"
4138 "OpDecorate %outdata Binding 1\n"
4139 "OpDecorate %u32arr ArrayStride 4\n"
4140 "OpMemberDecorate %buf 0 Offset 0\n"
4142 + string(getComputeAsmCommonTypes()) +
4144 "%f16 = OpTypeFloat 16\n"
4145 "%f16vec2 = OpTypeVector %f16 2\n"
4146 "%fvec2 = OpTypeVector %f32 2\n"
4147 "%u32ptr = OpTypePointer Uniform %u32\n"
4148 "%u32arr = OpTypeRuntimeArray %u32\n"
4149 "%f16_0 = OpConstant %f16 0.0\n"
4152 "%buf = OpTypeStruct %u32arr\n"
4153 "%bufptr = OpTypePointer Uniform %buf\n"
4154 "%indata = OpVariable %bufptr Uniform\n"
4155 "%outdata = OpVariable %bufptr Uniform\n"
4157 "%id = OpVariable %uvec3ptr Input\n"
4158 "%zero = OpConstant %i32 0\n"
4159 "%float_0 = OpConstant %f32 0.0\n"
4160 "%float_1 = OpConstant %f32 1.0\n"
4161 "%float_n1 = OpConstant %f32 -1.0\n"
4163 "%main = OpFunction %void None %voidf\n"
4164 "%entry = OpLabel\n"
4165 "%idval = OpLoad %uvec3 %id\n"
4166 "%x = OpCompositeExtract %u32 %idval 0\n"
4167 "%inloc = OpAccessChain %u32ptr %indata %zero %x\n"
4168 "%inval = OpLoad %u32 %inloc\n"
4169 "%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4170 "%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4171 "%f32_inval = OpFConvert %f32 %f16_inval\n"
4173 "%comp = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4174 " OpSelectionMerge %cm None\n"
4175 " OpBranchConditional %comp %tb %fb\n"
4181 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4182 "%f16_res = OpFConvert %f16 %res\n"
4184 "%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4185 "%u32_res = OpBitcast %u32 %f16vec2_res\n"
4187 "%outloc = OpAccessChain %u32ptr %outdata %zero %x\n"
4188 " OpStore %outloc %u32_res\n"
4193 specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4194 specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4195 specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4196 specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4199 string(getComputeAsmShaderPreamble()) +
4201 "OpSource GLSL 430\n"
4202 "OpName %main \"main\"\n"
4203 "OpName %id \"gl_GlobalInvocationID\"\n"
4205 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4207 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4209 "%id = OpVariable %uvec3ptr Input\n"
4210 "%v4f32 = OpTypeVector %f32 4\n"
4211 "%mat4v4f32 = OpTypeMatrix %v4f32 4\n"
4212 "%zero = OpConstant %i32 0\n"
4213 "%float_0 = OpConstant %f32 0.0\n"
4214 "%float_1 = OpConstant %f32 1.0\n"
4215 "%float_n1 = OpConstant %f32 -1.0\n"
4216 "%m11 = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4217 "%m12 = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4218 "%m13 = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4219 "%m14 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4220 "%m1 = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4221 "%m21 = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4222 "%m22 = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4223 "%m23 = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4224 "%m24 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4225 "%m2 = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4227 "%main = OpFunction %void None %voidf\n"
4228 "%entry = OpLabel\n"
4229 "%idval = OpLoad %uvec3 %id\n"
4230 "%x = OpCompositeExtract %u32 %idval 0\n"
4231 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4232 "%inval = OpLoad %f32 %inloc\n"
4234 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4235 " OpSelectionMerge %cm None\n"
4236 " OpBranchConditional %comp %tb %fb\n"
4242 "%mres = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4243 "%res = OpCompositeExtract %f32 %mres 2 2\n"
4245 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4246 " OpStore %outloc %res\n"
4250 specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4251 specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4252 specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4255 string(getComputeAsmShaderPreamble()) +
4257 "OpSource GLSL 430\n"
4258 "OpName %main \"main\"\n"
4259 "OpName %id \"gl_GlobalInvocationID\"\n"
4261 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4263 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4265 "%id = OpVariable %uvec3ptr Input\n"
4266 "%zero = OpConstant %i32 0\n"
4267 "%float_0 = OpConstant %f32 0.0\n"
4268 "%float_1 = OpConstant %f32 1.0\n"
4269 "%float_n1 = OpConstant %f32 -1.0\n"
4270 "%v1 = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4271 "%v2 = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4273 "%main = OpFunction %void None %voidf\n"
4274 "%entry = OpLabel\n"
4275 "%idval = OpLoad %uvec3 %id\n"
4276 "%x = OpCompositeExtract %u32 %idval 0\n"
4277 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4278 "%inval = OpLoad %f32 %inloc\n"
4280 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4281 " OpSelectionMerge %cm None\n"
4282 " OpBranchConditional %comp %tb %fb\n"
4288 "%vres = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4289 "%res = OpCompositeExtract %f32 %vres 2\n"
4291 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4292 " OpStore %outloc %res\n"
4296 specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4297 specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4298 specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4301 string(getComputeAsmShaderPreamble()) +
4303 "OpSource GLSL 430\n"
4304 "OpName %main \"main\"\n"
4305 "OpName %id \"gl_GlobalInvocationID\"\n"
4307 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4309 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4311 "%id = OpVariable %uvec3ptr Input\n"
4312 "%zero = OpConstant %i32 0\n"
4313 "%float_0 = OpConstant %f32 0.0\n"
4314 "%i1 = OpConstant %i32 1\n"
4315 "%i2 = OpConstant %i32 -1\n"
4317 "%main = OpFunction %void None %voidf\n"
4318 "%entry = OpLabel\n"
4319 "%idval = OpLoad %uvec3 %id\n"
4320 "%x = OpCompositeExtract %u32 %idval 0\n"
4321 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4322 "%inval = OpLoad %f32 %inloc\n"
4324 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4325 " OpSelectionMerge %cm None\n"
4326 " OpBranchConditional %comp %tb %fb\n"
4332 "%ires = OpPhi %i32 %i1 %tb %i2 %fb\n"
4333 "%res = OpConvertSToF %f32 %ires\n"
4335 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4336 " OpStore %outloc %res\n"
4340 specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4341 specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4342 specInt.numWorkGroups = IVec3(numElements, 1, 1);
4344 specArray.assembly =
4345 string(getComputeAsmShaderPreamble()) +
4347 "OpSource GLSL 430\n"
4348 "OpName %main \"main\"\n"
4349 "OpName %id \"gl_GlobalInvocationID\"\n"
4351 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4353 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4355 "%id = OpVariable %uvec3ptr Input\n"
4356 "%zero = OpConstant %i32 0\n"
4357 "%u7 = OpConstant %u32 7\n"
4358 "%float_0 = OpConstant %f32 0.0\n"
4359 "%float_1 = OpConstant %f32 1.0\n"
4360 "%float_n1 = OpConstant %f32 -1.0\n"
4361 "%f32a7 = OpTypeArray %f32 %u7\n"
4362 "%a1 = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4363 "%a2 = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
4364 "%main = OpFunction %void None %voidf\n"
4365 "%entry = OpLabel\n"
4366 "%idval = OpLoad %uvec3 %id\n"
4367 "%x = OpCompositeExtract %u32 %idval 0\n"
4368 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4369 "%inval = OpLoad %f32 %inloc\n"
4371 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4372 " OpSelectionMerge %cm None\n"
4373 " OpBranchConditional %comp %tb %fb\n"
4379 "%ares = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4380 "%res = OpCompositeExtract %f32 %ares 5\n"
4382 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4383 " OpStore %outloc %res\n"
4387 specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4388 specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4389 specArray.numWorkGroups = IVec3(numElements, 1, 1);
4391 specStruct.assembly =
4392 string(getComputeAsmShaderPreamble()) +
4394 "OpSource GLSL 430\n"
4395 "OpName %main \"main\"\n"
4396 "OpName %id \"gl_GlobalInvocationID\"\n"
4398 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4400 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4402 "%id = OpVariable %uvec3ptr Input\n"
4403 "%zero = OpConstant %i32 0\n"
4404 "%float_0 = OpConstant %f32 0.0\n"
4405 "%float_1 = OpConstant %f32 1.0\n"
4406 "%float_n1 = OpConstant %f32 -1.0\n"
4408 "%v2f32 = OpTypeVector %f32 2\n"
4409 "%Data2 = OpTypeStruct %f32 %v2f32\n"
4410 "%Data = OpTypeStruct %Data2 %f32\n"
4412 "%in1a = OpConstantComposite %v2f32 %float_1 %float_1\n"
4413 "%in1b = OpConstantComposite %Data2 %float_1 %in1a\n"
4414 "%s1 = OpConstantComposite %Data %in1b %float_1\n"
4415 "%in2a = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4416 "%in2b = OpConstantComposite %Data2 %float_n1 %in2a\n"
4417 "%s2 = OpConstantComposite %Data %in2b %float_n1\n"
4419 "%main = OpFunction %void None %voidf\n"
4420 "%entry = OpLabel\n"
4421 "%idval = OpLoad %uvec3 %id\n"
4422 "%x = OpCompositeExtract %u32 %idval 0\n"
4423 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4424 "%inval = OpLoad %f32 %inloc\n"
4426 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4427 " OpSelectionMerge %cm None\n"
4428 " OpBranchConditional %comp %tb %fb\n"
4434 "%sres = OpPhi %Data %s1 %tb %s2 %fb\n"
4435 "%res = OpCompositeExtract %f32 %sres 0 0\n"
4437 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4438 " OpStore %outloc %res\n"
4442 specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4443 specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4444 specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4446 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4447 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4448 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4449 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4450 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4451 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4452 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4455 string generateConstantDefinitions (int count)
4457 std::ostringstream r;
4458 for (int i = 0; i < count; i++)
4459 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4464 string generateSwitchCases (int count)
4466 std::ostringstream r;
4467 for (int i = 0; i < count; i++)
4468 r << " " << i << " %case" << i;
4473 string generateSwitchTargets (int count)
4475 std::ostringstream r;
4476 for (int i = 0; i < count; i++)
4477 r << "%case" << i << " = OpLabel\n OpBranch %phi\n";
4482 string generateOpPhiParams (int count)
4484 std::ostringstream r;
4485 for (int i = 0; i < count; i++)
4486 r << " %cf" << (i * 10 + 5) << " %case" << i;
4491 string generateIntWidth (int value)
4493 std::ostringstream r;
4498 // Expand input string by injecting "ABC" between the input
4499 // string characters. The acc/add/treshold parameters are used
4500 // to skip some of the injections to make the result less
4501 // uniform (and a lot shorter).
4502 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4504 std::ostringstream res;
4505 const char* p = s.c_str();
4521 // Calculate expected result based on the code string
4522 float calcOpPhiCase5 (float val, const string& s)
4524 const char* p = s.c_str();
4527 const float tv[8] = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4528 const float v = deFloatAbs(val);
4533 for (int i = 7; i >= 0; --i)
4534 x[i] = std::fmod((float)v, (float)(2 << i));
4535 for (int i = 7; i >= 0; --i)
4536 b[i] = x[i] > tv[i];
4543 if (skip == 0 && b[depth])
4554 if (b[depth] || skip)
4568 // In the code string, the letters represent the following:
4571 // if (certain bit is set)
4582 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4583 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4584 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4586 // Code generation gets a bit complicated due to the else-branches,
4587 // which do not generate new values. Thus, the generator needs to
4588 // keep track of the previous variable change seen by the else
4590 string generateOpPhiCase5 (const string& s)
4592 std::stack<int> idStack;
4593 std::stack<std::string> value;
4594 std::stack<std::string> valueLabel;
4595 std::stack<std::string> mergeLeft;
4596 std::stack<std::string> mergeRight;
4597 std::ostringstream res;
4598 const char* p = s.c_str();
4604 value.push("%f32_0");
4605 valueLabel.push("%f32_0 %entry");
4613 idStack.push(currId);
4614 res << "\tOpSelectionMerge %m" << currId << " None\n";
4615 res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4616 res << "%t" << currId << " = OpLabel\n";
4617 res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4618 std::ostringstream tag;
4619 tag << "%rt" << currId;
4620 value.push(tag.str());
4621 tag << " %t" << currId;
4622 valueLabel.push(tag.str());
4627 mergeLeft.push(valueLabel.top());
4630 res << "\tOpBranch %m" << currId << "\n";
4631 res << "%f" << currId << " = OpLabel\n";
4632 std::ostringstream tag;
4633 tag << value.top() << " %f" << currId;
4635 valueLabel.push(tag.str());
4640 mergeRight.push(valueLabel.top());
4641 res << "\tOpBranch %m" << currId << "\n";
4642 res << "%m" << currId << " = OpLabel\n";
4644 res << "%res"; // last result goes to %res
4646 res << "%rm" << currId;
4647 res << " = OpPhi %f32 " << mergeLeft.top() << " " << mergeRight.top() << "\n";
4648 std::ostringstream tag;
4649 tag << "%rm" << currId;
4651 value.push(tag.str());
4652 tag << " %m" << currId;
4654 valueLabel.push(tag.str());
4659 currId = idStack.top();
4667 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4669 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4670 ComputeShaderSpec spec1;
4671 ComputeShaderSpec spec2;
4672 ComputeShaderSpec spec3;
4673 ComputeShaderSpec spec4;
4674 ComputeShaderSpec spec5;
4675 de::Random rnd (deStringHash(group->getName()));
4676 const int numElements = 100;
4677 vector<float> inputFloats (numElements, 0);
4678 vector<float> outputFloats1 (numElements, 0);
4679 vector<float> outputFloats2 (numElements, 0);
4680 vector<float> outputFloats3 (numElements, 0);
4681 vector<float> outputFloats4 (numElements, 0);
4682 vector<float> outputFloats5 (numElements, 0);
4683 std::string codestring = "ABC";
4684 const int test4Width = 512;
4686 // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4687 // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4689 for (int i = 0, acc = 0; i < 9; i++)
4690 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4692 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4694 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4695 floorAll(inputFloats);
4697 for (size_t ndx = 0; ndx < numElements; ++ndx)
4701 case 0: outputFloats1[ndx] = inputFloats[ndx] + 5.5f; break;
4702 case 1: outputFloats1[ndx] = inputFloats[ndx] + 20.5f; break;
4703 case 2: outputFloats1[ndx] = inputFloats[ndx] + 1.75f; break;
4706 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4707 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4709 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4710 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4712 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4716 string(getComputeAsmShaderPreamble()) +
4718 "OpSource GLSL 430\n"
4719 "OpName %main \"main\"\n"
4720 "OpName %id \"gl_GlobalInvocationID\"\n"
4722 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4724 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4726 "%id = OpVariable %uvec3ptr Input\n"
4727 "%zero = OpConstant %i32 0\n"
4728 "%three = OpConstant %u32 3\n"
4729 "%constf5p5 = OpConstant %f32 5.5\n"
4730 "%constf20p5 = OpConstant %f32 20.5\n"
4731 "%constf1p75 = OpConstant %f32 1.75\n"
4732 "%constf8p5 = OpConstant %f32 8.5\n"
4733 "%constf6p5 = OpConstant %f32 6.5\n"
4735 "%main = OpFunction %void None %voidf\n"
4736 "%entry = OpLabel\n"
4737 "%idval = OpLoad %uvec3 %id\n"
4738 "%x = OpCompositeExtract %u32 %idval 0\n"
4739 "%selector = OpUMod %u32 %x %three\n"
4740 " OpSelectionMerge %phi None\n"
4741 " OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4743 // Case 1 before OpPhi.
4744 "%case1 = OpLabel\n"
4747 "%default = OpLabel\n"
4751 "%operand = OpPhi %f32 %constf1p75 %case2 %constf20p5 %case1 %constf5p5 %case0\n" // not in the order of blocks
4752 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4753 "%inval = OpLoad %f32 %inloc\n"
4754 "%add = OpFAdd %f32 %inval %operand\n"
4755 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4756 " OpStore %outloc %add\n"
4759 // Case 0 after OpPhi.
4760 "%case0 = OpLabel\n"
4764 // Case 2 after OpPhi.
4765 "%case2 = OpLabel\n"
4769 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4770 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4771 spec1.numWorkGroups = IVec3(numElements, 1, 1);
4773 group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4776 string(getComputeAsmShaderPreamble()) +
4778 "OpName %main \"main\"\n"
4779 "OpName %id \"gl_GlobalInvocationID\"\n"
4781 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4783 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4785 "%id = OpVariable %uvec3ptr Input\n"
4786 "%zero = OpConstant %i32 0\n"
4787 "%one = OpConstant %i32 1\n"
4788 "%three = OpConstant %i32 3\n"
4789 "%constf6p5 = OpConstant %f32 6.5\n"
4791 "%main = OpFunction %void None %voidf\n"
4792 "%entry = OpLabel\n"
4793 "%idval = OpLoad %uvec3 %id\n"
4794 "%x = OpCompositeExtract %u32 %idval 0\n"
4795 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4796 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4797 "%inval = OpLoad %f32 %inloc\n"
4801 "%step = OpPhi %i32 %zero %entry %step_next %phi\n"
4802 "%accum = OpPhi %f32 %inval %entry %accum_next %phi\n"
4803 "%step_next = OpIAdd %i32 %step %one\n"
4804 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4805 "%still_loop = OpSLessThan %bool %step %three\n"
4806 " OpLoopMerge %exit %phi None\n"
4807 " OpBranchConditional %still_loop %phi %exit\n"
4810 " OpStore %outloc %accum\n"
4813 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4814 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4815 spec2.numWorkGroups = IVec3(numElements, 1, 1);
4817 group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4820 string(getComputeAsmShaderPreamble()) +
4822 "OpName %main \"main\"\n"
4823 "OpName %id \"gl_GlobalInvocationID\"\n"
4825 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4827 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4829 "%f32ptr_f = OpTypePointer Function %f32\n"
4830 "%id = OpVariable %uvec3ptr Input\n"
4831 "%true = OpConstantTrue %bool\n"
4832 "%false = OpConstantFalse %bool\n"
4833 "%zero = OpConstant %i32 0\n"
4834 "%constf8p5 = OpConstant %f32 8.5\n"
4836 "%main = OpFunction %void None %voidf\n"
4837 "%entry = OpLabel\n"
4838 "%b = OpVariable %f32ptr_f Function %constf8p5\n"
4839 "%idval = OpLoad %uvec3 %id\n"
4840 "%x = OpCompositeExtract %u32 %idval 0\n"
4841 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4842 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4843 "%a_init = OpLoad %f32 %inloc\n"
4844 "%b_init = OpLoad %f32 %b\n"
4848 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
4849 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
4850 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
4851 " OpLoopMerge %exit %phi None\n"
4852 " OpBranchConditional %still_loop %phi %exit\n"
4855 "%sub = OpFSub %f32 %a_next %b_next\n"
4856 " OpStore %outloc %sub\n"
4859 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4860 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4861 spec3.numWorkGroups = IVec3(numElements, 1, 1);
4863 group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4866 "OpCapability Shader\n"
4867 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4868 "OpMemoryModel Logical GLSL450\n"
4869 "OpEntryPoint GLCompute %main \"main\" %id\n"
4870 "OpExecutionMode %main LocalSize 1 1 1\n"
4872 "OpSource GLSL 430\n"
4873 "OpName %main \"main\"\n"
4874 "OpName %id \"gl_GlobalInvocationID\"\n"
4876 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4878 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4880 "%id = OpVariable %uvec3ptr Input\n"
4881 "%zero = OpConstant %i32 0\n"
4882 "%cimod = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4884 + generateConstantDefinitions(test4Width) +
4886 "%main = OpFunction %void None %voidf\n"
4887 "%entry = OpLabel\n"
4888 "%idval = OpLoad %uvec3 %id\n"
4889 "%x = OpCompositeExtract %u32 %idval 0\n"
4890 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4891 "%inval = OpLoad %f32 %inloc\n"
4892 "%xf = OpConvertUToF %f32 %x\n"
4893 "%xm = OpFMul %f32 %xf %inval\n"
4894 "%xa = OpExtInst %f32 %ext FAbs %xm\n"
4895 "%xi = OpConvertFToU %u32 %xa\n"
4896 "%selector = OpUMod %u32 %xi %cimod\n"
4897 " OpSelectionMerge %phi None\n"
4898 " OpSwitch %selector %default "
4900 + generateSwitchCases(test4Width) +
4902 "%default = OpLabel\n"
4905 + generateSwitchTargets(test4Width) +
4908 "%result = OpPhi %f32"
4910 + generateOpPhiParams(test4Width) +
4912 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4913 " OpStore %outloc %result\n"
4917 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4918 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4919 spec4.numWorkGroups = IVec3(numElements, 1, 1);
4921 group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4924 "OpCapability Shader\n"
4925 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4926 "OpMemoryModel Logical GLSL450\n"
4927 "OpEntryPoint GLCompute %main \"main\" %id\n"
4928 "OpExecutionMode %main LocalSize 1 1 1\n"
4929 "%code = OpString \"" + codestring + "\"\n"
4931 "OpSource GLSL 430\n"
4932 "OpName %main \"main\"\n"
4933 "OpName %id \"gl_GlobalInvocationID\"\n"
4935 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4937 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4939 "%id = OpVariable %uvec3ptr Input\n"
4940 "%zero = OpConstant %i32 0\n"
4941 "%f32_0 = OpConstant %f32 0.0\n"
4942 "%f32_0_5 = OpConstant %f32 0.5\n"
4943 "%f32_1 = OpConstant %f32 1.0\n"
4944 "%f32_1_5 = OpConstant %f32 1.5\n"
4945 "%f32_2 = OpConstant %f32 2.0\n"
4946 "%f32_3_5 = OpConstant %f32 3.5\n"
4947 "%f32_4 = OpConstant %f32 4.0\n"
4948 "%f32_7_5 = OpConstant %f32 7.5\n"
4949 "%f32_8 = OpConstant %f32 8.0\n"
4950 "%f32_15_5 = OpConstant %f32 15.5\n"
4951 "%f32_16 = OpConstant %f32 16.0\n"
4952 "%f32_31_5 = OpConstant %f32 31.5\n"
4953 "%f32_32 = OpConstant %f32 32.0\n"
4954 "%f32_63_5 = OpConstant %f32 63.5\n"
4955 "%f32_64 = OpConstant %f32 64.0\n"
4956 "%f32_127_5 = OpConstant %f32 127.5\n"
4957 "%f32_128 = OpConstant %f32 128.0\n"
4958 "%f32_256 = OpConstant %f32 256.0\n"
4960 "%main = OpFunction %void None %voidf\n"
4961 "%entry = OpLabel\n"
4962 "%idval = OpLoad %uvec3 %id\n"
4963 "%x = OpCompositeExtract %u32 %idval 0\n"
4964 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4965 "%inval = OpLoad %f32 %inloc\n"
4967 "%xabs = OpExtInst %f32 %ext FAbs %inval\n"
4968 "%x8 = OpFMod %f32 %xabs %f32_256\n"
4969 "%x7 = OpFMod %f32 %xabs %f32_128\n"
4970 "%x6 = OpFMod %f32 %xabs %f32_64\n"
4971 "%x5 = OpFMod %f32 %xabs %f32_32\n"
4972 "%x4 = OpFMod %f32 %xabs %f32_16\n"
4973 "%x3 = OpFMod %f32 %xabs %f32_8\n"
4974 "%x2 = OpFMod %f32 %xabs %f32_4\n"
4975 "%x1 = OpFMod %f32 %xabs %f32_2\n"
4977 "%b7 = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4978 "%b6 = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4979 "%b5 = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4980 "%b4 = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4981 "%b3 = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4982 "%b2 = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4983 "%b1 = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4984 "%b0 = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4986 + generateOpPhiCase5(codestring) +
4988 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4989 " OpStore %outloc %res\n"
4993 spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4994 spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4995 spec5.numWorkGroups = IVec3(numElements, 1, 1);
4997 group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4999 createOpPhiVartypeTests(group, testCtx);
5001 return group.release();
5004 // Assembly code used for testing block order is based on GLSL source code:
5008 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5009 // float elements[];
5011 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5012 // float elements[];
5016 // uint x = gl_GlobalInvocationID.x;
5017 // output_data.elements[x] = input_data.elements[x];
5018 // if (x > uint(50)) {
5019 // switch (x % uint(3)) {
5020 // case 0: output_data.elements[x] += 1.5f; break;
5021 // case 1: output_data.elements[x] += 42.f; break;
5022 // case 2: output_data.elements[x] -= 27.f; break;
5026 // output_data.elements[x] = -input_data.elements[x];
5029 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
5031 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
5032 ComputeShaderSpec spec;
5033 de::Random rnd (deStringHash(group->getName()));
5034 const int numElements = 100;
5035 vector<float> inputFloats (numElements, 0);
5036 vector<float> outputFloats (numElements, 0);
5038 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5040 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5041 floorAll(inputFloats);
5043 for (size_t ndx = 0; ndx <= 50; ++ndx)
5044 outputFloats[ndx] = -inputFloats[ndx];
5046 for (size_t ndx = 51; ndx < numElements; ++ndx)
5050 case 0: outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
5051 case 1: outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
5052 case 2: outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
5058 string(getComputeAsmShaderPreamble()) +
5060 "OpSource GLSL 430\n"
5061 "OpName %main \"main\"\n"
5062 "OpName %id \"gl_GlobalInvocationID\"\n"
5064 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5066 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5068 "%u32ptr = OpTypePointer Function %u32\n"
5069 "%u32ptr_input = OpTypePointer Input %u32\n"
5071 + string(getComputeAsmInputOutputBuffer()) +
5073 "%id = OpVariable %uvec3ptr Input\n"
5074 "%zero = OpConstant %i32 0\n"
5075 "%const3 = OpConstant %u32 3\n"
5076 "%const50 = OpConstant %u32 50\n"
5077 "%constf1p5 = OpConstant %f32 1.5\n"
5078 "%constf27 = OpConstant %f32 27.0\n"
5079 "%constf42 = OpConstant %f32 42.0\n"
5081 "%main = OpFunction %void None %voidf\n"
5084 "%entry = OpLabel\n"
5086 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5087 "%xvar = OpVariable %u32ptr Function\n"
5088 "%xptr = OpAccessChain %u32ptr_input %id %zero\n"
5089 "%x = OpLoad %u32 %xptr\n"
5090 " OpStore %xvar %x\n"
5092 "%cmp = OpUGreaterThan %bool %x %const50\n"
5093 " OpSelectionMerge %if_merge None\n"
5094 " OpBranchConditional %cmp %if_true %if_false\n"
5096 // False branch for if-statement: placed in the middle of switch cases and before true branch.
5097 "%if_false = OpLabel\n"
5098 "%x_f = OpLoad %u32 %xvar\n"
5099 "%inloc_f = OpAccessChain %f32ptr %indata %zero %x_f\n"
5100 "%inval_f = OpLoad %f32 %inloc_f\n"
5101 "%negate = OpFNegate %f32 %inval_f\n"
5102 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5103 " OpStore %outloc_f %negate\n"
5104 " OpBranch %if_merge\n"
5106 // Merge block for if-statement: placed in the middle of true and false branch.
5107 "%if_merge = OpLabel\n"
5110 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5111 "%if_true = OpLabel\n"
5112 "%xval_t = OpLoad %u32 %xvar\n"
5113 "%mod = OpUMod %u32 %xval_t %const3\n"
5114 " OpSelectionMerge %switch_merge None\n"
5115 " OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5117 // Merge block for switch-statement: placed before the case
5118 // bodies. But it must follow OpSwitch which dominates it.
5119 "%switch_merge = OpLabel\n"
5120 " OpBranch %if_merge\n"
5122 // Case 1 for switch-statement: placed before case 0.
5123 // It must follow the OpSwitch that dominates it.
5124 "%case1 = OpLabel\n"
5125 "%x_1 = OpLoad %u32 %xvar\n"
5126 "%inloc_1 = OpAccessChain %f32ptr %indata %zero %x_1\n"
5127 "%inval_1 = OpLoad %f32 %inloc_1\n"
5128 "%addf42 = OpFAdd %f32 %inval_1 %constf42\n"
5129 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5130 " OpStore %outloc_1 %addf42\n"
5131 " OpBranch %switch_merge\n"
5133 // Case 2 for switch-statement.
5134 "%case2 = OpLabel\n"
5135 "%x_2 = OpLoad %u32 %xvar\n"
5136 "%inloc_2 = OpAccessChain %f32ptr %indata %zero %x_2\n"
5137 "%inval_2 = OpLoad %f32 %inloc_2\n"
5138 "%subf27 = OpFSub %f32 %inval_2 %constf27\n"
5139 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5140 " OpStore %outloc_2 %subf27\n"
5141 " OpBranch %switch_merge\n"
5143 // Default case for switch-statement: placed in the middle of normal cases.
5144 "%default = OpLabel\n"
5145 " OpBranch %switch_merge\n"
5147 // Case 0 for switch-statement: out of order.
5148 "%case0 = OpLabel\n"
5149 "%x_0 = OpLoad %u32 %xvar\n"
5150 "%inloc_0 = OpAccessChain %f32ptr %indata %zero %x_0\n"
5151 "%inval_0 = OpLoad %f32 %inloc_0\n"
5152 "%addf1p5 = OpFAdd %f32 %inval_0 %constf1p5\n"
5153 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5154 " OpStore %outloc_0 %addf1p5\n"
5155 " OpBranch %switch_merge\n"
5158 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5159 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5160 spec.numWorkGroups = IVec3(numElements, 1, 1);
5162 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
5164 return group.release();
5167 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
5169 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
5170 ComputeShaderSpec spec1;
5171 ComputeShaderSpec spec2;
5172 de::Random rnd (deStringHash(group->getName()));
5173 const int numElements = 100;
5174 vector<float> inputFloats (numElements, 0);
5175 vector<float> outputFloats1 (numElements, 0);
5176 vector<float> outputFloats2 (numElements, 0);
5177 fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5179 for (size_t ndx = 0; ndx < numElements; ++ndx)
5181 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5182 outputFloats2[ndx] = -inputFloats[ndx];
5185 const string assembly(
5186 "OpCapability Shader\n"
5187 "OpMemoryModel Logical GLSL450\n"
5188 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5189 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5190 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5191 "OpEntryPoint Vertex %vert_main \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5192 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5193 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5195 "OpName %comp_main1 \"entrypoint1\"\n"
5196 "OpName %comp_main2 \"entrypoint2\"\n"
5197 "OpName %vert_main \"entrypoint2\"\n"
5198 "OpName %id \"gl_GlobalInvocationID\"\n"
5199 "OpName %vert_builtin_st \"gl_PerVertex\"\n"
5200 "OpName %vertexIndex \"gl_VertexIndex\"\n"
5201 "OpName %instanceIndex \"gl_InstanceIndex\"\n"
5202 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5203 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5204 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5206 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5207 "OpDecorate %vertexIndex BuiltIn VertexIndex\n"
5208 "OpDecorate %instanceIndex BuiltIn InstanceIndex\n"
5209 "OpDecorate %vert_builtin_st Block\n"
5210 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5211 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5212 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5214 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5216 "%zero = OpConstant %i32 0\n"
5217 "%one = OpConstant %u32 1\n"
5218 "%c_f32_1 = OpConstant %f32 1\n"
5220 "%i32inputptr = OpTypePointer Input %i32\n"
5221 "%vec4 = OpTypeVector %f32 4\n"
5222 "%vec4ptr = OpTypePointer Output %vec4\n"
5223 "%f32arr1 = OpTypeArray %f32 %one\n"
5224 "%vert_builtin_st = OpTypeStruct %vec4 %f32 %f32arr1\n"
5225 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5226 "%vert_builtins = OpVariable %vert_builtin_st_ptr Output\n"
5228 "%id = OpVariable %uvec3ptr Input\n"
5229 "%vertexIndex = OpVariable %i32inputptr Input\n"
5230 "%instanceIndex = OpVariable %i32inputptr Input\n"
5231 "%c_vec4_1 = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5233 // gl_Position = vec4(1.);
5234 "%vert_main = OpFunction %void None %voidf\n"
5235 "%vert_entry = OpLabel\n"
5236 "%position = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5237 " OpStore %position %c_vec4_1\n"
5242 "%comp_main1 = OpFunction %void None %voidf\n"
5243 "%comp1_entry = OpLabel\n"
5244 "%idval1 = OpLoad %uvec3 %id\n"
5245 "%x1 = OpCompositeExtract %u32 %idval1 0\n"
5246 "%inloc1 = OpAccessChain %f32ptr %indata %zero %x1\n"
5247 "%inval1 = OpLoad %f32 %inloc1\n"
5248 "%add = OpFAdd %f32 %inval1 %inval1\n"
5249 "%outloc1 = OpAccessChain %f32ptr %outdata %zero %x1\n"
5250 " OpStore %outloc1 %add\n"
5255 "%comp_main2 = OpFunction %void None %voidf\n"
5256 "%comp2_entry = OpLabel\n"
5257 "%idval2 = OpLoad %uvec3 %id\n"
5258 "%x2 = OpCompositeExtract %u32 %idval2 0\n"
5259 "%inloc2 = OpAccessChain %f32ptr %indata %zero %x2\n"
5260 "%inval2 = OpLoad %f32 %inloc2\n"
5261 "%neg = OpFNegate %f32 %inval2\n"
5262 "%outloc2 = OpAccessChain %f32ptr %outdata %zero %x2\n"
5263 " OpStore %outloc2 %neg\n"
5265 " OpFunctionEnd\n");
5267 spec1.assembly = assembly;
5268 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5269 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5270 spec1.numWorkGroups = IVec3(numElements, 1, 1);
5271 spec1.entryPoint = "entrypoint1";
5273 spec2.assembly = assembly;
5274 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5275 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5276 spec2.numWorkGroups = IVec3(numElements, 1, 1);
5277 spec2.entryPoint = "entrypoint2";
5279 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
5280 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
5282 return group.release();
5285 inline std::string makeLongUTF8String (size_t num4ByteChars)
5287 // An example of a longest valid UTF-8 character. Be explicit about the
5288 // character type because Microsoft compilers can otherwise interpret the
5289 // character string as being over wide (16-bit) characters. Ideally, we
5290 // would just use a C++11 UTF-8 string literal, but we want to support older
5291 // Microsoft compilers.
5292 const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5293 std::string longString;
5294 longString.reserve(num4ByteChars * 4);
5295 for (size_t count = 0; count < num4ByteChars; count++)
5297 longString += earthAfrica;
5302 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
5304 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
5305 vector<CaseParameter> cases;
5306 de::Random rnd (deStringHash(group->getName()));
5307 const int numElements = 100;
5308 vector<float> positiveFloats (numElements, 0);
5309 vector<float> negativeFloats (numElements, 0);
5310 const StringTemplate shaderTemplate (
5311 "OpCapability Shader\n"
5312 "OpMemoryModel Logical GLSL450\n"
5314 "OpEntryPoint GLCompute %main \"main\" %id\n"
5315 "OpExecutionMode %main LocalSize 1 1 1\n"
5319 "OpName %main \"main\"\n"
5320 "OpName %id \"gl_GlobalInvocationID\"\n"
5322 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5324 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5326 "%id = OpVariable %uvec3ptr Input\n"
5327 "%zero = OpConstant %i32 0\n"
5329 "%main = OpFunction %void None %voidf\n"
5330 "%label = OpLabel\n"
5331 "%idval = OpLoad %uvec3 %id\n"
5332 "%x = OpCompositeExtract %u32 %idval 0\n"
5333 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5334 "%inval = OpLoad %f32 %inloc\n"
5335 "%neg = OpFNegate %f32 %inval\n"
5336 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5337 " OpStore %outloc %neg\n"
5339 " OpFunctionEnd\n");
5341 cases.push_back(CaseParameter("unknown_source", "OpSource Unknown 0"));
5342 cases.push_back(CaseParameter("wrong_source", "OpSource OpenCL_C 210"));
5343 cases.push_back(CaseParameter("normal_filename", "%fname = OpString \"filename\"\n"
5344 "OpSource GLSL 430 %fname"));
5345 cases.push_back(CaseParameter("empty_filename", "%fname = OpString \"\"\n"
5346 "OpSource GLSL 430 %fname"));
5347 cases.push_back(CaseParameter("normal_source_code", "%fname = OpString \"filename\"\n"
5348 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5349 cases.push_back(CaseParameter("empty_source_code", "%fname = OpString \"filename\"\n"
5350 "OpSource GLSL 430 %fname \"\""));
5351 cases.push_back(CaseParameter("long_source_code", "%fname = OpString \"filename\"\n"
5352 "OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5353 cases.push_back(CaseParameter("utf8_source_code", "%fname = OpString \"filename\"\n"
5354 "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5355 cases.push_back(CaseParameter("normal_sourcecontinued", "%fname = OpString \"filename\"\n"
5356 "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5357 "OpSourceContinued \"id main() {}\""));
5358 cases.push_back(CaseParameter("empty_sourcecontinued", "%fname = OpString \"filename\"\n"
5359 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5360 "OpSourceContinued \"\""));
5361 cases.push_back(CaseParameter("long_sourcecontinued", "%fname = OpString \"filename\"\n"
5362 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5363 "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
5364 cases.push_back(CaseParameter("utf8_sourcecontinued", "%fname = OpString \"filename\"\n"
5365 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5366 "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5367 cases.push_back(CaseParameter("multi_sourcecontinued", "%fname = OpString \"filename\"\n"
5368 "OpSource GLSL 430 %fname \"#version 430\n\"\n"
5369 "OpSourceContinued \"void\"\n"
5370 "OpSourceContinued \"main()\"\n"
5371 "OpSourceContinued \"{}\""));
5372 cases.push_back(CaseParameter("empty_source_before_sourcecontinued", "%fname = OpString \"filename\"\n"
5373 "OpSource GLSL 430 %fname \"\"\n"
5374 "OpSourceContinued \"#version 430\nvoid main() {}\""));
5376 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5378 for (size_t ndx = 0; ndx < numElements; ++ndx)
5379 negativeFloats[ndx] = -positiveFloats[ndx];
5381 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5383 map<string, string> specializations;
5384 ComputeShaderSpec spec;
5386 specializations["SOURCE"] = cases[caseNdx].param;
5387 spec.assembly = shaderTemplate.specialize(specializations);
5388 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5389 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5390 spec.numWorkGroups = IVec3(numElements, 1, 1);
5392 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5395 return group.release();
5398 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5400 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5401 vector<CaseParameter> cases;
5402 de::Random rnd (deStringHash(group->getName()));
5403 const int numElements = 100;
5404 vector<float> inputFloats (numElements, 0);
5405 vector<float> outputFloats (numElements, 0);
5406 const StringTemplate shaderTemplate (
5407 string(getComputeAsmShaderPreamble()) +
5409 "OpSourceExtension \"${EXTENSION}\"\n"
5411 "OpName %main \"main\"\n"
5412 "OpName %id \"gl_GlobalInvocationID\"\n"
5414 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5416 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5418 "%id = OpVariable %uvec3ptr Input\n"
5419 "%zero = OpConstant %i32 0\n"
5421 "%main = OpFunction %void None %voidf\n"
5422 "%label = OpLabel\n"
5423 "%idval = OpLoad %uvec3 %id\n"
5424 "%x = OpCompositeExtract %u32 %idval 0\n"
5425 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5426 "%inval = OpLoad %f32 %inloc\n"
5427 "%neg = OpFNegate %f32 %inval\n"
5428 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5429 " OpStore %outloc %neg\n"
5431 " OpFunctionEnd\n");
5433 cases.push_back(CaseParameter("empty_extension", ""));
5434 cases.push_back(CaseParameter("real_extension", "GL_ARB_texture_rectangle"));
5435 cases.push_back(CaseParameter("fake_extension", "GL_ARB_im_the_ultimate_extension"));
5436 cases.push_back(CaseParameter("utf8_extension", "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5437 cases.push_back(CaseParameter("long_extension", makeLongUTF8String(65533) + "ccc")); // word count: 65535
5439 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5441 for (size_t ndx = 0; ndx < numElements; ++ndx)
5442 outputFloats[ndx] = -inputFloats[ndx];
5444 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5446 map<string, string> specializations;
5447 ComputeShaderSpec spec;
5449 specializations["EXTENSION"] = cases[caseNdx].param;
5450 spec.assembly = shaderTemplate.specialize(specializations);
5451 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5452 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5453 spec.numWorkGroups = IVec3(numElements, 1, 1);
5455 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5458 return group.release();
5461 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
5462 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5464 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5465 vector<CaseParameter> cases;
5466 de::Random rnd (deStringHash(group->getName()));
5467 const int numElements = 100;
5468 vector<float> positiveFloats (numElements, 0);
5469 vector<float> negativeFloats (numElements, 0);
5470 const StringTemplate shaderTemplate (
5471 string(getComputeAsmShaderPreamble()) +
5473 "OpSource GLSL 430\n"
5474 "OpName %main \"main\"\n"
5475 "OpName %id \"gl_GlobalInvocationID\"\n"
5477 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5479 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5480 "%uvec2 = OpTypeVector %u32 2\n"
5481 "%bvec3 = OpTypeVector %bool 3\n"
5482 "%fvec4 = OpTypeVector %f32 4\n"
5483 "%fmat33 = OpTypeMatrix %fvec3 3\n"
5484 "%const100 = OpConstant %u32 100\n"
5485 "%uarr100 = OpTypeArray %i32 %const100\n"
5486 "%struct = OpTypeStruct %f32 %i32 %u32\n"
5487 "%pointer = OpTypePointer Function %i32\n"
5488 + string(getComputeAsmInputOutputBuffer()) +
5490 "%null = OpConstantNull ${TYPE}\n"
5492 "%id = OpVariable %uvec3ptr Input\n"
5493 "%zero = OpConstant %i32 0\n"
5495 "%main = OpFunction %void None %voidf\n"
5496 "%label = OpLabel\n"
5497 "%idval = OpLoad %uvec3 %id\n"
5498 "%x = OpCompositeExtract %u32 %idval 0\n"
5499 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5500 "%inval = OpLoad %f32 %inloc\n"
5501 "%neg = OpFNegate %f32 %inval\n"
5502 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5503 " OpStore %outloc %neg\n"
5505 " OpFunctionEnd\n");
5507 cases.push_back(CaseParameter("bool", "%bool"));
5508 cases.push_back(CaseParameter("sint32", "%i32"));
5509 cases.push_back(CaseParameter("uint32", "%u32"));
5510 cases.push_back(CaseParameter("float32", "%f32"));
5511 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
5512 cases.push_back(CaseParameter("vec3bool", "%bvec3"));
5513 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
5514 cases.push_back(CaseParameter("matrix", "%fmat33"));
5515 cases.push_back(CaseParameter("array", "%uarr100"));
5516 cases.push_back(CaseParameter("struct", "%struct"));
5517 cases.push_back(CaseParameter("pointer", "%pointer"));
5519 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5521 for (size_t ndx = 0; ndx < numElements; ++ndx)
5522 negativeFloats[ndx] = -positiveFloats[ndx];
5524 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5526 map<string, string> specializations;
5527 ComputeShaderSpec spec;
5529 specializations["TYPE"] = cases[caseNdx].param;
5530 spec.assembly = shaderTemplate.specialize(specializations);
5531 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5532 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5533 spec.numWorkGroups = IVec3(numElements, 1, 1);
5535 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5538 return group.release();
5541 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5542 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5544 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5545 vector<CaseParameter> cases;
5546 de::Random rnd (deStringHash(group->getName()));
5547 const int numElements = 100;
5548 vector<float> positiveFloats (numElements, 0);
5549 vector<float> negativeFloats (numElements, 0);
5550 const StringTemplate shaderTemplate (
5551 string(getComputeAsmShaderPreamble()) +
5553 "OpSource GLSL 430\n"
5554 "OpName %main \"main\"\n"
5555 "OpName %id \"gl_GlobalInvocationID\"\n"
5557 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5559 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5561 "%id = OpVariable %uvec3ptr Input\n"
5562 "%zero = OpConstant %i32 0\n"
5566 "%main = OpFunction %void None %voidf\n"
5567 "%label = OpLabel\n"
5568 "%idval = OpLoad %uvec3 %id\n"
5569 "%x = OpCompositeExtract %u32 %idval 0\n"
5570 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5571 "%inval = OpLoad %f32 %inloc\n"
5572 "%neg = OpFNegate %f32 %inval\n"
5573 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5574 " OpStore %outloc %neg\n"
5576 " OpFunctionEnd\n");
5578 cases.push_back(CaseParameter("vector", "%five = OpConstant %u32 5\n"
5579 "%const = OpConstantComposite %uvec3 %five %zero %five"));
5580 cases.push_back(CaseParameter("matrix", "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5581 "%ten = OpConstant %f32 10.\n"
5582 "%fzero = OpConstant %f32 0.\n"
5583 "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5584 "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5585 cases.push_back(CaseParameter("struct", "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5586 "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5587 "%fzero = OpConstant %f32 0.\n"
5588 "%one = OpConstant %f32 1.\n"
5589 "%point5 = OpConstant %f32 0.5\n"
5590 "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5591 "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5592 "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5593 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %u32 %f32\n"
5594 "%st2 = OpTypeStruct %i32 %i32\n"
5595 "%struct = OpTypeStruct %st1 %st2\n"
5596 "%point5 = OpConstant %f32 0.5\n"
5597 "%one = OpConstant %u32 1\n"
5598 "%ten = OpConstant %i32 10\n"
5599 "%st1val = OpConstantComposite %st1 %one %point5\n"
5600 "%st2val = OpConstantComposite %st2 %ten %ten\n"
5601 "%const = OpConstantComposite %struct %st1val %st2val"));
5603 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5605 for (size_t ndx = 0; ndx < numElements; ++ndx)
5606 negativeFloats[ndx] = -positiveFloats[ndx];
5608 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5610 map<string, string> specializations;
5611 ComputeShaderSpec spec;
5613 specializations["CONSTANT"] = cases[caseNdx].param;
5614 spec.assembly = shaderTemplate.specialize(specializations);
5615 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5616 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5617 spec.numWorkGroups = IVec3(numElements, 1, 1);
5619 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5622 return group.release();
5625 // Creates a floating point number with the given exponent, and significand
5626 // bits set. It can only create normalized numbers. Only the least significant
5627 // 24 bits of the significand will be examined. The final bit of the
5628 // significand will also be ignored. This allows alignment to be written
5629 // similarly to C99 hex-floats.
5630 // For example if you wanted to write 0x1.7f34p-12 you would call
5631 // constructNormalizedFloat(-12, 0x7f3400)
5632 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5636 for (deInt32 idx = 0; idx < 23; ++idx)
5638 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5642 return std::ldexp(f, exponent);
5645 // Compare instruction for the OpQuantizeF16 compute exact case.
5646 // Returns true if the output is what is expected from the test case.
5647 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5649 if (outputAllocs.size() != 1)
5652 // Only size is needed because we cannot compare Nans.
5653 size_t byteSize = expectedOutputs[0].getByteSize();
5655 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5657 if (byteSize != 4*sizeof(float)) {
5661 if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5662 *outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5667 if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5668 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5673 if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5674 *outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5679 if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5680 *outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5687 // Checks that every output from a test-case is a float NaN.
5688 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5690 if (outputAllocs.size() != 1)
5693 // Only size is needed because we cannot compare Nans.
5694 size_t byteSize = expectedOutputs[0].getByteSize();
5696 const float* const output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5698 for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5700 if (!deFloatIsNaN(output_as_float[idx]))
5709 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
5710 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5712 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5714 const std::string shader (
5715 string(getComputeAsmShaderPreamble()) +
5717 "OpSource GLSL 430\n"
5718 "OpName %main \"main\"\n"
5719 "OpName %id \"gl_GlobalInvocationID\"\n"
5721 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5723 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5725 "%id = OpVariable %uvec3ptr Input\n"
5726 "%zero = OpConstant %i32 0\n"
5728 "%main = OpFunction %void None %voidf\n"
5729 "%label = OpLabel\n"
5730 "%idval = OpLoad %uvec3 %id\n"
5731 "%x = OpCompositeExtract %u32 %idval 0\n"
5732 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5733 "%inval = OpLoad %f32 %inloc\n"
5734 "%quant = OpQuantizeToF16 %f32 %inval\n"
5735 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5736 " OpStore %outloc %quant\n"
5738 " OpFunctionEnd\n");
5741 ComputeShaderSpec spec;
5742 const deUint32 numElements = 100;
5743 vector<float> infinities;
5744 vector<float> results;
5746 infinities.reserve(numElements);
5747 results.reserve(numElements);
5749 for (size_t idx = 0; idx < numElements; ++idx)
5754 infinities.push_back(std::numeric_limits<float>::infinity());
5755 results.push_back(std::numeric_limits<float>::infinity());
5758 infinities.push_back(-std::numeric_limits<float>::infinity());
5759 results.push_back(-std::numeric_limits<float>::infinity());
5762 infinities.push_back(std::ldexp(1.0f, 16));
5763 results.push_back(std::numeric_limits<float>::infinity());
5766 infinities.push_back(std::ldexp(-1.0f, 32));
5767 results.push_back(-std::numeric_limits<float>::infinity());
5772 spec.assembly = shader;
5773 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5774 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5775 spec.numWorkGroups = IVec3(numElements, 1, 1);
5777 group->addChild(new SpvAsmComputeShaderCase(
5778 testCtx, "infinities", "Check that infinities propagated and created", spec));
5782 ComputeShaderSpec spec;
5784 const deUint32 numElements = 100;
5786 nans.reserve(numElements);
5788 for (size_t idx = 0; idx < numElements; ++idx)
5792 nans.push_back(std::numeric_limits<float>::quiet_NaN());
5796 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5800 spec.assembly = shader;
5801 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5802 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5803 spec.numWorkGroups = IVec3(numElements, 1, 1);
5804 spec.verifyIO = &compareNan;
5806 group->addChild(new SpvAsmComputeShaderCase(
5807 testCtx, "propagated_nans", "Check that nans are propagated", spec));
5811 ComputeShaderSpec spec;
5812 vector<float> small;
5813 vector<float> zeros;
5814 const deUint32 numElements = 100;
5816 small.reserve(numElements);
5817 zeros.reserve(numElements);
5819 for (size_t idx = 0; idx < numElements; ++idx)
5824 small.push_back(0.f);
5825 zeros.push_back(0.f);
5828 small.push_back(-0.f);
5829 zeros.push_back(-0.f);
5832 small.push_back(std::ldexp(1.0f, -16));
5833 zeros.push_back(0.f);
5836 small.push_back(std::ldexp(-1.0f, -32));
5837 zeros.push_back(-0.f);
5840 small.push_back(std::ldexp(1.0f, -127));
5841 zeros.push_back(0.f);
5844 small.push_back(-std::ldexp(1.0f, -128));
5845 zeros.push_back(-0.f);
5850 spec.assembly = shader;
5851 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5852 spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5853 spec.numWorkGroups = IVec3(numElements, 1, 1);
5855 group->addChild(new SpvAsmComputeShaderCase(
5856 testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5860 ComputeShaderSpec spec;
5861 vector<float> exact;
5862 const deUint32 numElements = 200;
5864 exact.reserve(numElements);
5866 for (size_t idx = 0; idx < numElements; ++idx)
5867 exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5869 spec.assembly = shader;
5870 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5871 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5872 spec.numWorkGroups = IVec3(numElements, 1, 1);
5874 group->addChild(new SpvAsmComputeShaderCase(
5875 testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5879 ComputeShaderSpec spec;
5880 vector<float> inputs;
5881 const deUint32 numElements = 4;
5883 inputs.push_back(constructNormalizedFloat(8, 0x300300));
5884 inputs.push_back(-constructNormalizedFloat(-7, 0x600800));
5885 inputs.push_back(constructNormalizedFloat(2, 0x01E000));
5886 inputs.push_back(constructNormalizedFloat(1, 0xFFE000));
5888 spec.assembly = shader;
5889 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5890 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5891 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5892 spec.numWorkGroups = IVec3(numElements, 1, 1);
5894 group->addChild(new SpvAsmComputeShaderCase(
5895 testCtx, "rounded", "Check that are rounded when needed", spec));
5898 return group.release();
5901 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5903 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5905 const std::string shader (
5906 string(getComputeAsmShaderPreamble()) +
5908 "OpName %main \"main\"\n"
5909 "OpName %id \"gl_GlobalInvocationID\"\n"
5911 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5913 "OpDecorate %sc_0 SpecId 0\n"
5914 "OpDecorate %sc_1 SpecId 1\n"
5915 "OpDecorate %sc_2 SpecId 2\n"
5916 "OpDecorate %sc_3 SpecId 3\n"
5917 "OpDecorate %sc_4 SpecId 4\n"
5918 "OpDecorate %sc_5 SpecId 5\n"
5920 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5922 "%id = OpVariable %uvec3ptr Input\n"
5923 "%zero = OpConstant %i32 0\n"
5924 "%c_u32_6 = OpConstant %u32 6\n"
5926 "%sc_0 = OpSpecConstant %f32 0.\n"
5927 "%sc_1 = OpSpecConstant %f32 0.\n"
5928 "%sc_2 = OpSpecConstant %f32 0.\n"
5929 "%sc_3 = OpSpecConstant %f32 0.\n"
5930 "%sc_4 = OpSpecConstant %f32 0.\n"
5931 "%sc_5 = OpSpecConstant %f32 0.\n"
5933 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5934 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5935 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5936 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5937 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5938 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5940 "%main = OpFunction %void None %voidf\n"
5941 "%label = OpLabel\n"
5942 "%idval = OpLoad %uvec3 %id\n"
5943 "%x = OpCompositeExtract %u32 %idval 0\n"
5944 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5945 "%selector = OpUMod %u32 %x %c_u32_6\n"
5946 " OpSelectionMerge %exit None\n"
5947 " OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5949 "%case0 = OpLabel\n"
5950 " OpStore %outloc %sc_0_quant\n"
5953 "%case1 = OpLabel\n"
5954 " OpStore %outloc %sc_1_quant\n"
5957 "%case2 = OpLabel\n"
5958 " OpStore %outloc %sc_2_quant\n"
5961 "%case3 = OpLabel\n"
5962 " OpStore %outloc %sc_3_quant\n"
5965 "%case4 = OpLabel\n"
5966 " OpStore %outloc %sc_4_quant\n"
5969 "%case5 = OpLabel\n"
5970 " OpStore %outloc %sc_5_quant\n"
5976 " OpFunctionEnd\n");
5979 ComputeShaderSpec spec;
5980 const deUint8 numCases = 4;
5981 vector<float> inputs (numCases, 0.f);
5982 vector<float> outputs;
5984 spec.assembly = shader;
5985 spec.numWorkGroups = IVec3(numCases, 1, 1);
5987 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5988 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5989 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5990 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5992 outputs.push_back(std::numeric_limits<float>::infinity());
5993 outputs.push_back(-std::numeric_limits<float>::infinity());
5994 outputs.push_back(std::numeric_limits<float>::infinity());
5995 outputs.push_back(-std::numeric_limits<float>::infinity());
5997 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5998 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6000 group->addChild(new SpvAsmComputeShaderCase(
6001 testCtx, "infinities", "Check that infinities propagated and created", spec));
6005 ComputeShaderSpec spec;
6006 const deUint8 numCases = 2;
6007 vector<float> inputs (numCases, 0.f);
6008 vector<float> outputs;
6010 spec.assembly = shader;
6011 spec.numWorkGroups = IVec3(numCases, 1, 1);
6012 spec.verifyIO = &compareNan;
6014 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6015 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6017 for (deUint8 idx = 0; idx < numCases; ++idx)
6018 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6020 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6021 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6023 group->addChild(new SpvAsmComputeShaderCase(
6024 testCtx, "propagated_nans", "Check that nans are propagated", spec));
6028 ComputeShaderSpec spec;
6029 const deUint8 numCases = 6;
6030 vector<float> inputs (numCases, 0.f);
6031 vector<float> outputs;
6033 spec.assembly = shader;
6034 spec.numWorkGroups = IVec3(numCases, 1, 1);
6036 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
6037 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
6038 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
6039 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
6040 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
6041 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
6043 outputs.push_back(0.f);
6044 outputs.push_back(-0.f);
6045 outputs.push_back(0.f);
6046 outputs.push_back(-0.f);
6047 outputs.push_back(0.f);
6048 outputs.push_back(-0.f);
6050 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6051 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6053 group->addChild(new SpvAsmComputeShaderCase(
6054 testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
6058 ComputeShaderSpec spec;
6059 const deUint8 numCases = 6;
6060 vector<float> inputs (numCases, 0.f);
6061 vector<float> outputs;
6063 spec.assembly = shader;
6064 spec.numWorkGroups = IVec3(numCases, 1, 1);
6066 for (deUint8 idx = 0; idx < 6; ++idx)
6068 const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6069 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
6070 outputs.push_back(f);
6073 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6074 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6076 group->addChild(new SpvAsmComputeShaderCase(
6077 testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
6081 ComputeShaderSpec spec;
6082 const deUint8 numCases = 4;
6083 vector<float> inputs (numCases, 0.f);
6084 vector<float> outputs;
6086 spec.assembly = shader;
6087 spec.numWorkGroups = IVec3(numCases, 1, 1);
6088 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
6090 outputs.push_back(constructNormalizedFloat(8, 0x300300));
6091 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6092 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6093 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6095 for (deUint8 idx = 0; idx < numCases; ++idx)
6096 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6098 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6099 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6101 group->addChild(new SpvAsmComputeShaderCase(
6102 testCtx, "rounded", "Check that are rounded when needed", spec));
6105 return group.release();
6108 // Checks that constant null/composite values can be used in computation.
6109 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
6111 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
6112 ComputeShaderSpec spec;
6113 de::Random rnd (deStringHash(group->getName()));
6114 const int numElements = 100;
6115 vector<float> positiveFloats (numElements, 0);
6116 vector<float> negativeFloats (numElements, 0);
6118 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6120 for (size_t ndx = 0; ndx < numElements; ++ndx)
6121 negativeFloats[ndx] = -positiveFloats[ndx];
6124 "OpCapability Shader\n"
6125 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6126 "OpMemoryModel Logical GLSL450\n"
6127 "OpEntryPoint GLCompute %main \"main\" %id\n"
6128 "OpExecutionMode %main LocalSize 1 1 1\n"
6130 "OpSource GLSL 430\n"
6131 "OpName %main \"main\"\n"
6132 "OpName %id \"gl_GlobalInvocationID\"\n"
6134 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6136 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6138 "%fmat = OpTypeMatrix %fvec3 3\n"
6139 "%ten = OpConstant %u32 10\n"
6140 "%f32arr10 = OpTypeArray %f32 %ten\n"
6141 "%fst = OpTypeStruct %f32 %f32\n"
6143 + string(getComputeAsmInputOutputBuffer()) +
6145 "%id = OpVariable %uvec3ptr Input\n"
6146 "%zero = OpConstant %i32 0\n"
6148 // Create a bunch of null values
6149 "%unull = OpConstantNull %u32\n"
6150 "%fnull = OpConstantNull %f32\n"
6151 "%vnull = OpConstantNull %fvec3\n"
6152 "%mnull = OpConstantNull %fmat\n"
6153 "%anull = OpConstantNull %f32arr10\n"
6154 "%snull = OpConstantComposite %fst %fnull %fnull\n"
6156 "%main = OpFunction %void None %voidf\n"
6157 "%label = OpLabel\n"
6158 "%idval = OpLoad %uvec3 %id\n"
6159 "%x = OpCompositeExtract %u32 %idval 0\n"
6160 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6161 "%inval = OpLoad %f32 %inloc\n"
6162 "%neg = OpFNegate %f32 %inval\n"
6164 // Get the abs() of (a certain element of) those null values
6165 "%unull_cov = OpConvertUToF %f32 %unull\n"
6166 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6167 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6168 "%vnull_0 = OpCompositeExtract %f32 %vnull 0\n"
6169 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6170 "%mnull_12 = OpCompositeExtract %f32 %mnull 1 2\n"
6171 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6172 "%anull_3 = OpCompositeExtract %f32 %anull 3\n"
6173 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6174 "%snull_1 = OpCompositeExtract %f32 %snull 1\n"
6175 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6178 "%add1 = OpFAdd %f32 %neg %unull_abs\n"
6179 "%add2 = OpFAdd %f32 %add1 %fnull_abs\n"
6180 "%add3 = OpFAdd %f32 %add2 %vnull_abs\n"
6181 "%add4 = OpFAdd %f32 %add3 %mnull_abs\n"
6182 "%add5 = OpFAdd %f32 %add4 %anull_abs\n"
6183 "%final = OpFAdd %f32 %add5 %snull_abs\n"
6185 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6186 " OpStore %outloc %final\n" // write to output
6189 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6190 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6191 spec.numWorkGroups = IVec3(numElements, 1, 1);
6193 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
6195 return group.release();
6198 // Assembly code used for testing loop control is based on GLSL source code:
6201 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6202 // float elements[];
6204 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6205 // float elements[];
6209 // uint x = gl_GlobalInvocationID.x;
6210 // output_data.elements[x] = input_data.elements[x];
6211 // for (uint i = 0; i < 4; ++i)
6212 // output_data.elements[x] += 1.f;
6214 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
6216 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
6217 vector<CaseParameter> cases;
6218 de::Random rnd (deStringHash(group->getName()));
6219 const int numElements = 100;
6220 vector<float> inputFloats (numElements, 0);
6221 vector<float> outputFloats (numElements, 0);
6222 const StringTemplate shaderTemplate (
6223 string(getComputeAsmShaderPreamble()) +
6225 "OpSource GLSL 430\n"
6226 "OpName %main \"main\"\n"
6227 "OpName %id \"gl_GlobalInvocationID\"\n"
6229 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6231 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6233 "%u32ptr = OpTypePointer Function %u32\n"
6235 "%id = OpVariable %uvec3ptr Input\n"
6236 "%zero = OpConstant %i32 0\n"
6237 "%uzero = OpConstant %u32 0\n"
6238 "%one = OpConstant %i32 1\n"
6239 "%constf1 = OpConstant %f32 1.0\n"
6240 "%four = OpConstant %u32 4\n"
6242 "%main = OpFunction %void None %voidf\n"
6243 "%entry = OpLabel\n"
6244 "%i = OpVariable %u32ptr Function\n"
6245 " OpStore %i %uzero\n"
6247 "%idval = OpLoad %uvec3 %id\n"
6248 "%x = OpCompositeExtract %u32 %idval 0\n"
6249 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6250 "%inval = OpLoad %f32 %inloc\n"
6251 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6252 " OpStore %outloc %inval\n"
6253 " OpBranch %loop_entry\n"
6255 "%loop_entry = OpLabel\n"
6256 "%i_val = OpLoad %u32 %i\n"
6257 "%cmp_lt = OpULessThan %bool %i_val %four\n"
6258 " OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6259 " OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6260 "%loop_body = OpLabel\n"
6261 "%outval = OpLoad %f32 %outloc\n"
6262 "%addf1 = OpFAdd %f32 %outval %constf1\n"
6263 " OpStore %outloc %addf1\n"
6264 "%new_i = OpIAdd %u32 %i_val %one\n"
6265 " OpStore %i %new_i\n"
6266 " OpBranch %loop_entry\n"
6267 "%loop_merge = OpLabel\n"
6269 " OpFunctionEnd\n");
6271 cases.push_back(CaseParameter("none", "None"));
6272 cases.push_back(CaseParameter("unroll", "Unroll"));
6273 cases.push_back(CaseParameter("dont_unroll", "DontUnroll"));
6275 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6277 for (size_t ndx = 0; ndx < numElements; ++ndx)
6278 outputFloats[ndx] = inputFloats[ndx] + 4.f;
6280 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6282 map<string, string> specializations;
6283 ComputeShaderSpec spec;
6285 specializations["CONTROL"] = cases[caseNdx].param;
6286 spec.assembly = shaderTemplate.specialize(specializations);
6287 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6288 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6289 spec.numWorkGroups = IVec3(numElements, 1, 1);
6291 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6294 group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
6295 group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
6297 return group.release();
6300 // Assembly code used for testing selection control is based on GLSL source code:
6303 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6304 // float elements[];
6306 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6307 // float elements[];
6311 // uint x = gl_GlobalInvocationID.x;
6312 // float val = input_data.elements[x];
6314 // output_data.elements[x] = val + 1.f;
6316 // output_data.elements[x] = val - 1.f;
6318 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
6320 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
6321 vector<CaseParameter> cases;
6322 de::Random rnd (deStringHash(group->getName()));
6323 const int numElements = 100;
6324 vector<float> inputFloats (numElements, 0);
6325 vector<float> outputFloats (numElements, 0);
6326 const StringTemplate shaderTemplate (
6327 string(getComputeAsmShaderPreamble()) +
6329 "OpSource GLSL 430\n"
6330 "OpName %main \"main\"\n"
6331 "OpName %id \"gl_GlobalInvocationID\"\n"
6333 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6335 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6337 "%id = OpVariable %uvec3ptr Input\n"
6338 "%zero = OpConstant %i32 0\n"
6339 "%constf1 = OpConstant %f32 1.0\n"
6340 "%constf10 = OpConstant %f32 10.0\n"
6342 "%main = OpFunction %void None %voidf\n"
6343 "%entry = OpLabel\n"
6344 "%idval = OpLoad %uvec3 %id\n"
6345 "%x = OpCompositeExtract %u32 %idval 0\n"
6346 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6347 "%inval = OpLoad %f32 %inloc\n"
6348 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6349 "%cmp_gt = OpFOrdGreaterThan %bool %inval %constf10\n"
6351 " OpSelectionMerge %if_end ${CONTROL}\n"
6352 " OpBranchConditional %cmp_gt %if_true %if_false\n"
6353 "%if_true = OpLabel\n"
6354 "%addf1 = OpFAdd %f32 %inval %constf1\n"
6355 " OpStore %outloc %addf1\n"
6356 " OpBranch %if_end\n"
6357 "%if_false = OpLabel\n"
6358 "%subf1 = OpFSub %f32 %inval %constf1\n"
6359 " OpStore %outloc %subf1\n"
6360 " OpBranch %if_end\n"
6361 "%if_end = OpLabel\n"
6363 " OpFunctionEnd\n");
6365 cases.push_back(CaseParameter("none", "None"));
6366 cases.push_back(CaseParameter("flatten", "Flatten"));
6367 cases.push_back(CaseParameter("dont_flatten", "DontFlatten"));
6368 cases.push_back(CaseParameter("flatten_dont_flatten", "DontFlatten|Flatten"));
6370 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6372 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6373 floorAll(inputFloats);
6375 for (size_t ndx = 0; ndx < numElements; ++ndx)
6376 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6378 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6380 map<string, string> specializations;
6381 ComputeShaderSpec spec;
6383 specializations["CONTROL"] = cases[caseNdx].param;
6384 spec.assembly = shaderTemplate.specialize(specializations);
6385 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6386 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6387 spec.numWorkGroups = IVec3(numElements, 1, 1);
6389 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6392 return group.release();
6395 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6397 // Generate a long name.
6398 std::string longname;
6399 longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6401 // Some bad names, abusing utf-8 encoding. This may also cause problems
6403 // 1. Various illegal code points in utf-8
6404 std::string utf8illegal =
6405 "Illegal bytes in UTF-8: "
6406 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6407 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6409 // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6410 std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6412 // 3. Some overlong encodings
6413 std::string utf8overlong =
6414 "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6417 // 4. Internet "zalgo" meme "bleeding text"
6418 std::string utf8zalgo =
6419 "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6420 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6421 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6422 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6423 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6424 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6425 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6426 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6427 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6428 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6429 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6430 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6431 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6432 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6433 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6434 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6435 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6436 "\x93\xcd\x96\xcc\x97\xff";
6438 // General name abuses
6439 abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6440 abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6441 abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6442 abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6443 abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6446 abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6447 abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6448 abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6449 abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6450 abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6451 abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6452 abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6453 abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6454 abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6455 abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6456 abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6457 abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6458 abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6459 abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6460 abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6461 abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6462 abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6463 abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6464 abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6465 abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6466 abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6469 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6471 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6472 de::MovePtr<tcu::TestCaseGroup> entryMainGroup (new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6473 de::MovePtr<tcu::TestCaseGroup> entryNotGroup (new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6474 de::MovePtr<tcu::TestCaseGroup> abuseGroup (new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6475 vector<CaseParameter> cases;
6476 vector<CaseParameter> abuseCases;
6477 vector<string> testFunc;
6478 de::Random rnd (deStringHash(group->getName()));
6479 const int numElements = 128;
6480 vector<float> inputFloats (numElements, 0);
6481 vector<float> outputFloats (numElements, 0);
6483 getOpNameAbuseCases(abuseCases);
6485 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6487 for(size_t ndx = 0; ndx < numElements; ++ndx)
6488 outputFloats[ndx] = -inputFloats[ndx];
6490 const string commonShaderHeader =
6491 "OpCapability Shader\n"
6492 "OpMemoryModel Logical GLSL450\n"
6493 "OpEntryPoint GLCompute %main \"main\" %id\n"
6494 "OpExecutionMode %main LocalSize 1 1 1\n";
6496 const string commonShaderFooter =
6497 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6499 + string(getComputeAsmInputOutputBufferTraits())
6500 + string(getComputeAsmCommonTypes())
6501 + string(getComputeAsmInputOutputBuffer()) +
6503 "%id = OpVariable %uvec3ptr Input\n"
6504 "%zero = OpConstant %i32 0\n"
6506 "%func = OpFunction %void None %voidf\n"
6511 "%main = OpFunction %void None %voidf\n"
6512 "%entry = OpLabel\n"
6513 "%7 = OpFunctionCall %void %func\n"
6515 "%idval = OpLoad %uvec3 %id\n"
6516 "%x = OpCompositeExtract %u32 %idval 0\n"
6518 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6519 "%inval = OpLoad %f32 %inloc\n"
6520 "%neg = OpFNegate %f32 %inval\n"
6521 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6522 " OpStore %outloc %neg\n"
6527 const StringTemplate shaderTemplate (
6528 "OpCapability Shader\n"
6529 "OpMemoryModel Logical GLSL450\n"
6530 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6531 "OpExecutionMode %main LocalSize 1 1 1\n"
6532 "OpName %${ID} \"${NAME}\"\n" +
6533 commonShaderFooter);
6535 const std::string multipleNames =
6536 commonShaderHeader +
6537 "OpName %main \"to_be\"\n"
6538 "OpName %id \"or_not\"\n"
6539 "OpName %main \"to_be\"\n"
6540 "OpName %main \"makes_no\"\n"
6541 "OpName %func \"difference\"\n"
6542 "OpName %5 \"to_me\"\n" +
6546 ComputeShaderSpec spec;
6548 spec.assembly = multipleNames;
6549 spec.numWorkGroups = IVec3(numElements, 1, 1);
6550 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6551 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6553 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6556 const std::string everythingNamed =
6557 commonShaderHeader +
6558 "OpName %main \"name1\"\n"
6559 "OpName %id \"name2\"\n"
6560 "OpName %zero \"name3\"\n"
6561 "OpName %entry \"name4\"\n"
6562 "OpName %func \"name5\"\n"
6563 "OpName %5 \"name6\"\n"
6564 "OpName %7 \"name7\"\n"
6565 "OpName %idval \"name8\"\n"
6566 "OpName %inloc \"name9\"\n"
6567 "OpName %inval \"name10\"\n"
6568 "OpName %neg \"name11\"\n"
6569 "OpName %outloc \"name12\"\n"+
6572 ComputeShaderSpec spec;
6574 spec.assembly = everythingNamed;
6575 spec.numWorkGroups = IVec3(numElements, 1, 1);
6576 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6577 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6579 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6582 const std::string everythingNamedTheSame =
6583 commonShaderHeader +
6584 "OpName %main \"the_same\"\n"
6585 "OpName %id \"the_same\"\n"
6586 "OpName %zero \"the_same\"\n"
6587 "OpName %entry \"the_same\"\n"
6588 "OpName %func \"the_same\"\n"
6589 "OpName %5 \"the_same\"\n"
6590 "OpName %7 \"the_same\"\n"
6591 "OpName %idval \"the_same\"\n"
6592 "OpName %inloc \"the_same\"\n"
6593 "OpName %inval \"the_same\"\n"
6594 "OpName %neg \"the_same\"\n"
6595 "OpName %outloc \"the_same\"\n"+
6598 ComputeShaderSpec spec;
6600 spec.assembly = everythingNamedTheSame;
6601 spec.numWorkGroups = IVec3(numElements, 1, 1);
6602 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6603 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6605 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6609 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6611 map<string, string> specializations;
6612 ComputeShaderSpec spec;
6614 specializations["ENTRY"] = "main";
6615 specializations["ID"] = "main";
6616 specializations["NAME"] = abuseCases[ndx].param;
6617 spec.assembly = shaderTemplate.specialize(specializations);
6618 spec.numWorkGroups = IVec3(numElements, 1, 1);
6619 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6620 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6622 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6626 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6628 map<string, string> specializations;
6629 ComputeShaderSpec spec;
6631 specializations["ENTRY"] = "main";
6632 specializations["ID"] = "x";
6633 specializations["NAME"] = abuseCases[ndx].param;
6634 spec.assembly = shaderTemplate.specialize(specializations);
6635 spec.numWorkGroups = IVec3(numElements, 1, 1);
6636 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6637 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6639 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6642 cases.push_back(CaseParameter("_is_main", "main"));
6643 cases.push_back(CaseParameter("_is_not_main", "not_main"));
6644 testFunc.push_back("main");
6645 testFunc.push_back("func");
6647 for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6649 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6651 map<string, string> specializations;
6652 ComputeShaderSpec spec;
6654 specializations["ENTRY"] = "main";
6655 specializations["ID"] = testFunc[fNdx];
6656 specializations["NAME"] = cases[ndx].param;
6657 spec.assembly = shaderTemplate.specialize(specializations);
6658 spec.numWorkGroups = IVec3(numElements, 1, 1);
6659 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6660 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6662 entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6666 cases.push_back(CaseParameter("_is_entry", "rdc"));
6668 for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6670 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6672 map<string, string> specializations;
6673 ComputeShaderSpec spec;
6675 specializations["ENTRY"] = "rdc";
6676 specializations["ID"] = testFunc[fNdx];
6677 specializations["NAME"] = cases[ndx].param;
6678 spec.assembly = shaderTemplate.specialize(specializations);
6679 spec.numWorkGroups = IVec3(numElements, 1, 1);
6680 spec.entryPoint = "rdc";
6681 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6682 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6684 entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6688 group->addChild(entryMainGroup.release());
6689 group->addChild(entryNotGroup.release());
6690 group->addChild(abuseGroup.release());
6692 return group.release();
6695 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6697 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6698 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6699 vector<CaseParameter> abuseCases;
6700 vector<string> testFunc;
6701 de::Random rnd(deStringHash(group->getName()));
6702 const int numElements = 128;
6703 vector<float> inputFloats(numElements, 0);
6704 vector<float> outputFloats(numElements, 0);
6706 getOpNameAbuseCases(abuseCases);
6708 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6710 for (size_t ndx = 0; ndx < numElements; ++ndx)
6711 outputFloats[ndx] = -inputFloats[ndx];
6713 const string commonShaderHeader =
6714 "OpCapability Shader\n"
6715 "OpMemoryModel Logical GLSL450\n"
6716 "OpEntryPoint GLCompute %main \"main\" %id\n"
6717 "OpExecutionMode %main LocalSize 1 1 1\n";
6719 const string commonShaderFooter =
6720 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6722 + string(getComputeAsmInputOutputBufferTraits())
6723 + string(getComputeAsmCommonTypes())
6724 + string(getComputeAsmInputOutputBuffer()) +
6726 "%u3str = OpTypeStruct %u32 %u32 %u32\n"
6728 "%id = OpVariable %uvec3ptr Input\n"
6729 "%zero = OpConstant %i32 0\n"
6731 "%main = OpFunction %void None %voidf\n"
6732 "%entry = OpLabel\n"
6734 "%idval = OpLoad %uvec3 %id\n"
6735 "%x0 = OpCompositeExtract %u32 %idval 0\n"
6737 "%idstr = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6738 "%x = OpCompositeExtract %u32 %idstr 0\n"
6740 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6741 "%inval = OpLoad %f32 %inloc\n"
6742 "%neg = OpFNegate %f32 %inval\n"
6743 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6744 " OpStore %outloc %neg\n"
6749 const StringTemplate shaderTemplate(
6750 commonShaderHeader +
6751 "OpMemberName %u3str 0 \"${NAME}\"\n" +
6752 commonShaderFooter);
6754 const std::string multipleNames =
6755 commonShaderHeader +
6756 "OpMemberName %u3str 0 \"to_be\"\n"
6757 "OpMemberName %u3str 1 \"or_not\"\n"
6758 "OpMemberName %u3str 0 \"to_be\"\n"
6759 "OpMemberName %u3str 2 \"makes_no\"\n"
6760 "OpMemberName %u3str 0 \"difference\"\n"
6761 "OpMemberName %u3str 0 \"to_me\"\n" +
6764 ComputeShaderSpec spec;
6766 spec.assembly = multipleNames;
6767 spec.numWorkGroups = IVec3(numElements, 1, 1);
6768 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6769 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6771 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6774 const std::string everythingNamedTheSame =
6775 commonShaderHeader +
6776 "OpMemberName %u3str 0 \"the_same\"\n"
6777 "OpMemberName %u3str 1 \"the_same\"\n"
6778 "OpMemberName %u3str 2 \"the_same\"\n" +
6782 ComputeShaderSpec spec;
6784 spec.assembly = everythingNamedTheSame;
6785 spec.numWorkGroups = IVec3(numElements, 1, 1);
6786 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6787 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6789 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6793 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6795 map<string, string> specializations;
6796 ComputeShaderSpec spec;
6798 specializations["NAME"] = abuseCases[ndx].param;
6799 spec.assembly = shaderTemplate.specialize(specializations);
6800 spec.numWorkGroups = IVec3(numElements, 1, 1);
6801 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6802 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6804 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6807 group->addChild(abuseGroup.release());
6809 return group.release();
6812 // Assembly code used for testing function control is based on GLSL source code:
6816 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6817 // float elements[];
6819 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6820 // float elements[];
6823 // float const10() { return 10.f; }
6826 // uint x = gl_GlobalInvocationID.x;
6827 // output_data.elements[x] = input_data.elements[x] + const10();
6829 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6831 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6832 vector<CaseParameter> cases;
6833 de::Random rnd (deStringHash(group->getName()));
6834 const int numElements = 100;
6835 vector<float> inputFloats (numElements, 0);
6836 vector<float> outputFloats (numElements, 0);
6837 const StringTemplate shaderTemplate (
6838 string(getComputeAsmShaderPreamble()) +
6840 "OpSource GLSL 430\n"
6841 "OpName %main \"main\"\n"
6842 "OpName %func_const10 \"const10(\"\n"
6843 "OpName %id \"gl_GlobalInvocationID\"\n"
6845 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6847 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6849 "%f32f = OpTypeFunction %f32\n"
6850 "%id = OpVariable %uvec3ptr Input\n"
6851 "%zero = OpConstant %i32 0\n"
6852 "%constf10 = OpConstant %f32 10.0\n"
6854 "%main = OpFunction %void None %voidf\n"
6855 "%entry = OpLabel\n"
6856 "%idval = OpLoad %uvec3 %id\n"
6857 "%x = OpCompositeExtract %u32 %idval 0\n"
6858 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6859 "%inval = OpLoad %f32 %inloc\n"
6860 "%ret_10 = OpFunctionCall %f32 %func_const10\n"
6861 "%fadd = OpFAdd %f32 %inval %ret_10\n"
6862 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6863 " OpStore %outloc %fadd\n"
6867 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6868 "%label = OpLabel\n"
6869 " OpReturnValue %constf10\n"
6870 " OpFunctionEnd\n");
6872 cases.push_back(CaseParameter("none", "None"));
6873 cases.push_back(CaseParameter("inline", "Inline"));
6874 cases.push_back(CaseParameter("dont_inline", "DontInline"));
6875 cases.push_back(CaseParameter("pure", "Pure"));
6876 cases.push_back(CaseParameter("const", "Const"));
6877 cases.push_back(CaseParameter("inline_pure", "Inline|Pure"));
6878 cases.push_back(CaseParameter("const_dont_inline", "Const|DontInline"));
6879 cases.push_back(CaseParameter("inline_dont_inline", "Inline|DontInline"));
6880 cases.push_back(CaseParameter("pure_inline_dont_inline", "Pure|Inline|DontInline"));
6882 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6884 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6885 floorAll(inputFloats);
6887 for (size_t ndx = 0; ndx < numElements; ++ndx)
6888 outputFloats[ndx] = inputFloats[ndx] + 10.f;
6890 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6892 map<string, string> specializations;
6893 ComputeShaderSpec spec;
6895 specializations["CONTROL"] = cases[caseNdx].param;
6896 spec.assembly = shaderTemplate.specialize(specializations);
6897 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6898 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6899 spec.numWorkGroups = IVec3(numElements, 1, 1);
6901 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6904 return group.release();
6907 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6909 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6910 vector<CaseParameter> cases;
6911 de::Random rnd (deStringHash(group->getName()));
6912 const int numElements = 100;
6913 vector<float> inputFloats (numElements, 0);
6914 vector<float> outputFloats (numElements, 0);
6915 const StringTemplate shaderTemplate (
6916 string(getComputeAsmShaderPreamble()) +
6918 "OpSource GLSL 430\n"
6919 "OpName %main \"main\"\n"
6920 "OpName %id \"gl_GlobalInvocationID\"\n"
6922 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6924 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6926 "%f32ptr_f = OpTypePointer Function %f32\n"
6928 "%id = OpVariable %uvec3ptr Input\n"
6929 "%zero = OpConstant %i32 0\n"
6930 "%four = OpConstant %i32 4\n"
6932 "%main = OpFunction %void None %voidf\n"
6933 "%label = OpLabel\n"
6934 "%copy = OpVariable %f32ptr_f Function\n"
6935 "%idval = OpLoad %uvec3 %id ${ACCESS}\n"
6936 "%x = OpCompositeExtract %u32 %idval 0\n"
6937 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6938 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6939 " OpCopyMemory %copy %inloc ${ACCESS}\n"
6940 "%val1 = OpLoad %f32 %copy\n"
6941 "%val2 = OpLoad %f32 %inloc\n"
6942 "%add = OpFAdd %f32 %val1 %val2\n"
6943 " OpStore %outloc %add ${ACCESS}\n"
6945 " OpFunctionEnd\n");
6947 cases.push_back(CaseParameter("null", ""));
6948 cases.push_back(CaseParameter("none", "None"));
6949 cases.push_back(CaseParameter("volatile", "Volatile"));
6950 cases.push_back(CaseParameter("aligned", "Aligned 4"));
6951 cases.push_back(CaseParameter("nontemporal", "Nontemporal"));
6952 cases.push_back(CaseParameter("aligned_nontemporal", "Aligned|Nontemporal 4"));
6953 cases.push_back(CaseParameter("aligned_volatile", "Volatile|Aligned 4"));
6955 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6957 for (size_t ndx = 0; ndx < numElements; ++ndx)
6958 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6960 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6962 map<string, string> specializations;
6963 ComputeShaderSpec spec;
6965 specializations["ACCESS"] = cases[caseNdx].param;
6966 spec.assembly = shaderTemplate.specialize(specializations);
6967 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6968 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6969 spec.numWorkGroups = IVec3(numElements, 1, 1);
6971 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6974 return group.release();
6977 // Checks that we can get undefined values for various types, without exercising a computation with it.
6978 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6980 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6981 vector<CaseParameter> cases;
6982 de::Random rnd (deStringHash(group->getName()));
6983 const int numElements = 100;
6984 vector<float> positiveFloats (numElements, 0);
6985 vector<float> negativeFloats (numElements, 0);
6986 const StringTemplate shaderTemplate (
6987 string(getComputeAsmShaderPreamble()) +
6989 "OpSource GLSL 430\n"
6990 "OpName %main \"main\"\n"
6991 "OpName %id \"gl_GlobalInvocationID\"\n"
6993 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6995 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6996 "%uvec2 = OpTypeVector %u32 2\n"
6997 "%fvec4 = OpTypeVector %f32 4\n"
6998 "%fmat33 = OpTypeMatrix %fvec3 3\n"
6999 "%image = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
7000 "%sampler = OpTypeSampler\n"
7001 "%simage = OpTypeSampledImage %image\n"
7002 "%const100 = OpConstant %u32 100\n"
7003 "%uarr100 = OpTypeArray %i32 %const100\n"
7004 "%struct = OpTypeStruct %f32 %i32 %u32\n"
7005 "%pointer = OpTypePointer Function %i32\n"
7006 + string(getComputeAsmInputOutputBuffer()) +
7008 "%id = OpVariable %uvec3ptr Input\n"
7009 "%zero = OpConstant %i32 0\n"
7011 "%main = OpFunction %void None %voidf\n"
7012 "%label = OpLabel\n"
7014 "%undef = OpUndef ${TYPE}\n"
7016 "%idval = OpLoad %uvec3 %id\n"
7017 "%x = OpCompositeExtract %u32 %idval 0\n"
7019 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7020 "%inval = OpLoad %f32 %inloc\n"
7021 "%neg = OpFNegate %f32 %inval\n"
7022 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7023 " OpStore %outloc %neg\n"
7025 " OpFunctionEnd\n");
7027 cases.push_back(CaseParameter("bool", "%bool"));
7028 cases.push_back(CaseParameter("sint32", "%i32"));
7029 cases.push_back(CaseParameter("uint32", "%u32"));
7030 cases.push_back(CaseParameter("float32", "%f32"));
7031 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
7032 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
7033 cases.push_back(CaseParameter("matrix", "%fmat33"));
7034 cases.push_back(CaseParameter("image", "%image"));
7035 cases.push_back(CaseParameter("sampler", "%sampler"));
7036 cases.push_back(CaseParameter("sampledimage", "%simage"));
7037 cases.push_back(CaseParameter("array", "%uarr100"));
7038 cases.push_back(CaseParameter("runtimearray", "%f32arr"));
7039 cases.push_back(CaseParameter("struct", "%struct"));
7040 cases.push_back(CaseParameter("pointer", "%pointer"));
7042 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7044 for (size_t ndx = 0; ndx < numElements; ++ndx)
7045 negativeFloats[ndx] = -positiveFloats[ndx];
7047 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7049 map<string, string> specializations;
7050 ComputeShaderSpec spec;
7052 specializations["TYPE"] = cases[caseNdx].param;
7053 spec.assembly = shaderTemplate.specialize(specializations);
7054 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7055 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7056 spec.numWorkGroups = IVec3(numElements, 1, 1);
7058 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7061 // OpUndef with constants.
7062 #ifndef CTS_USES_VULKANSC
7064 static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7068 const std::string name;
7069 const std::string desc;
7072 { "undefined_constant_composite", "OpUndef value in OpConstantComposite" },
7073 { "undefined_spec_constant_composite", "OpUndef value in OpSpecConstantComposite" },
7076 for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7078 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
7079 amberCases[i].name.c_str(),
7080 amberCases[i].desc.c_str(),
7082 amberCases[i].name + ".amber");
7083 group->addChild(testCase);
7088 return group.release();
7091 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
7092 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
7094 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
7095 vector<CaseParameter> cases;
7096 de::Random rnd (deStringHash(group->getName()));
7097 const int numElements = 100;
7098 vector<float> positiveFloats (numElements, 0);
7099 vector<float> negativeFloats (numElements, 0);
7100 const StringTemplate shaderTemplate (
7101 "OpCapability Shader\n"
7102 "OpCapability Float16\n"
7103 "OpMemoryModel Logical GLSL450\n"
7104 "OpEntryPoint GLCompute %main \"main\" %id\n"
7105 "OpExecutionMode %main LocalSize 1 1 1\n"
7106 "OpSource GLSL 430\n"
7107 "OpName %main \"main\"\n"
7108 "OpName %id \"gl_GlobalInvocationID\"\n"
7110 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7112 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7114 "%id = OpVariable %uvec3ptr Input\n"
7115 "%zero = OpConstant %i32 0\n"
7116 "%f16 = OpTypeFloat 16\n"
7117 "%c_f16_0 = OpConstant %f16 0.0\n"
7118 "%c_f16_0_5 = OpConstant %f16 0.5\n"
7119 "%c_f16_1 = OpConstant %f16 1.0\n"
7120 "%v2f16 = OpTypeVector %f16 2\n"
7121 "%v3f16 = OpTypeVector %f16 3\n"
7122 "%v4f16 = OpTypeVector %f16 4\n"
7126 "%main = OpFunction %void None %voidf\n"
7127 "%label = OpLabel\n"
7128 "%idval = OpLoad %uvec3 %id\n"
7129 "%x = OpCompositeExtract %u32 %idval 0\n"
7130 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7131 "%inval = OpLoad %f32 %inloc\n"
7132 "%neg = OpFNegate %f32 %inval\n"
7133 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7134 " OpStore %outloc %neg\n"
7136 " OpFunctionEnd\n");
7139 cases.push_back(CaseParameter("vector", "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7140 cases.push_back(CaseParameter("matrix", "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7141 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7142 "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7143 cases.push_back(CaseParameter("struct", "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7144 "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7145 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7146 "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7147 "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7148 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %i32 %f16\n"
7149 "%st2 = OpTypeStruct %i32 %i32\n"
7150 "%struct = OpTypeStruct %st1 %st2\n"
7151 "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7152 "%st2val = OpConstantComposite %st2 %zero %zero\n"
7153 "%const = OpConstantComposite %struct %st1val %st2val"));
7155 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7157 for (size_t ndx = 0; ndx < numElements; ++ndx)
7158 negativeFloats[ndx] = -positiveFloats[ndx];
7160 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7162 map<string, string> specializations;
7163 ComputeShaderSpec spec;
7165 specializations["CONSTANT"] = cases[caseNdx].param;
7166 spec.assembly = shaderTemplate.specialize(specializations);
7167 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7168 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7169 spec.numWorkGroups = IVec3(numElements, 1, 1);
7171 spec.extensions.push_back("VK_KHR_shader_float16_int8");
7173 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7175 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7178 return group.release();
7181 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
7183 const size_t inDataLength = inData.size();
7184 vector<deFloat16> result;
7186 result.reserve(inDataLength * inDataLength);
7190 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7191 result.insert(result.end(), inData.begin(), inData.end());
7196 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7198 const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
7200 result.insert(result.end(), tmp.begin(), tmp.end());
7207 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
7209 vector<deFloat16> vec;
7210 vector<deFloat16> result;
7212 // Create vectors. vec will contain each possible pair from inData
7214 const size_t inDataLength = inData.size();
7216 DE_ASSERT(inDataLength <= 64);
7218 vec.reserve(2 * inDataLength * inDataLength);
7220 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7221 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7223 vec.push_back(inData[numIdxX]);
7224 vec.push_back(inData[numIdxY]);
7228 // Create vector pairs. result will contain each possible pair from vec
7230 const size_t coordsPerVector = 2;
7231 const size_t vectorsCount = vec.size() / coordsPerVector;
7233 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7237 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7238 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7240 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7241 result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7247 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7248 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7250 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7251 result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7259 struct fp16isNan { bool operator()(const tcu::Float16 in1, const tcu::Float16) { return in1.isNaN(); } };
7260 struct fp16isInf { bool operator()(const tcu::Float16 in1, const tcu::Float16) { return in1.isInf(); } };
7261 struct fp16isEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() == in2.asFloat(); } };
7262 struct fp16isUnequal { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() != in2.asFloat(); } };
7263 struct fp16isLess { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() < in2.asFloat(); } };
7264 struct fp16isGreater { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() > in2.asFloat(); } };
7265 struct fp16isLessOrEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() <= in2.asFloat(); } };
7266 struct fp16isGreaterOrEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() >= in2.asFloat(); } };
7268 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
7269 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
7271 if (inputs.size() != 2 || outputAllocs.size() != 1)
7274 vector<deUint8> input1Bytes;
7275 vector<deUint8> input2Bytes;
7277 inputs[0].getBytes(input1Bytes);
7278 inputs[1].getBytes(input2Bytes);
7280 const deUint32 denormModesCount = 2;
7281 const deFloat16 float16one = tcu::Float16(1.0f).bits();
7282 const deFloat16 float16zero = tcu::Float16(0.0f).bits();
7283 const tcu::Float16 zero = tcu::Float16::zero(1);
7284 const deFloat16* const outputAsFP16 = static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
7285 const deFloat16* const input1AsFP16 = reinterpret_cast<deFloat16* const>(&input1Bytes.front());
7286 const deFloat16* const input2AsFP16 = reinterpret_cast<deFloat16* const>(&input2Bytes.front());
7287 deUint32 successfulRuns = denormModesCount;
7288 std::string results[denormModesCount];
7289 TestedLogicalFunction testedLogicalFunction;
7291 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7293 const bool flushToZero = (denormMode == 1);
7295 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7297 const tcu::Float16 f1pre = tcu::Float16(input1AsFP16[idx]);
7298 const tcu::Float16 f2pre = tcu::Float16(input2AsFP16[idx]);
7299 const tcu::Float16 f1 = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7300 const tcu::Float16 f2 = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7301 deFloat16 expectedOutput = float16zero;
7305 if (testedLogicalFunction(f1, f2))
7306 expectedOutput = float16one;
7310 const bool f1nan = f1.isNaN();
7311 const bool f2nan = f2.isNaN();
7313 // Skip NaN floats if not supported by implementation
7314 if (!nanSupported && (f1nan || f2nan))
7319 const bool ordered = !f1nan && !f2nan;
7321 if (ordered && testedLogicalFunction(f1, f2))
7322 expectedOutput = float16one;
7326 const bool unordered = f1nan || f2nan;
7328 if (unordered || testedLogicalFunction(f1, f2))
7329 expectedOutput = float16one;
7333 if (outputAsFP16[idx] != expectedOutput)
7335 std::ostringstream str;
7337 str << "ERROR: Sub-case #" << idx
7338 << " flushToZero:" << flushToZero
7340 << " failed, inputs: 0x" << f1.bits()
7341 << ";0x" << f2.bits()
7342 << " output: 0x" << outputAsFP16[idx]
7343 << " expected output: 0x" << expectedOutput;
7345 results[denormMode] = str.str();
7354 if (successfulRuns == 0)
7355 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7356 log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7358 return successfulRuns > 0;
7363 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
7365 struct NameCodePair { string name, code; };
7366 RGBA defaultColors[4];
7367 de::MovePtr<tcu::TestCaseGroup> opSourceTests (new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
7368 const std::string opsourceGLSLWithFile = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7369 map<string, string> fragments = passthruFragments();
7370 const NameCodePair tests[] =
7372 {"unknown", "OpSource Unknown 321"},
7373 {"essl", "OpSource ESSL 310"},
7374 {"glsl", "OpSource GLSL 450"},
7375 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
7376 {"opencl_c", "OpSource OpenCL_C 120"},
7377 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7378 {"file", opsourceGLSLWithFile},
7379 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
7380 // Longest possible source string: SPIR-V limits instructions to 65535
7381 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
7382 // contain 65530 UTF8 characters (one word each) plus one last word
7383 // containing 3 ASCII characters and \0.
7384 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
7387 getDefaultColors(defaultColors);
7388 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7390 fragments["debug"] = tests[testNdx].code;
7391 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7394 return opSourceTests.release();
7397 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7399 struct NameCodePair { string name, code; };
7400 RGBA defaultColors[4];
7401 de::MovePtr<tcu::TestCaseGroup> opSourceTests (new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7402 map<string, string> fragments = passthruFragments();
7403 const std::string opsource = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7404 const NameCodePair tests[] =
7406 {"empty", opsource + "OpSourceContinued \"\""},
7407 {"short", opsource + "OpSourceContinued \"abcde\""},
7408 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7409 // Longest possible source string: SPIR-V limits instructions to 65535
7410 // words, of which the first one is OpSourceContinued/length; the rest
7411 // will contain 65533 UTF8 characters (one word each) plus one last word
7412 // containing 3 ASCII characters and \0.
7413 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7416 getDefaultColors(defaultColors);
7417 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7419 fragments["debug"] = tests[testNdx].code;
7420 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7423 return opSourceTests.release();
7425 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7427 RGBA defaultColors[4];
7428 de::MovePtr<tcu::TestCaseGroup> opLineTests (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7429 map<string, string> fragments;
7430 getDefaultColors(defaultColors);
7431 fragments["debug"] =
7432 "%name = OpString \"name\"\n";
7434 fragments["pre_main"] =
7437 "OpLine %name 1 1\n"
7439 "OpLine %name 1 1\n"
7440 "OpLine %name 1 1\n"
7441 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7443 "OpLine %name 1 1\n"
7445 "OpLine %name 1 1\n"
7446 "OpLine %name 1 1\n"
7447 "%second_param1 = OpFunctionParameter %v4f32\n"
7450 "%label_secondfunction = OpLabel\n"
7452 "OpReturnValue %second_param1\n"
7457 fragments["testfun"] =
7458 // A %test_code function that returns its argument unchanged.
7461 "OpLine %name 1 1\n"
7462 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7464 "%param1 = OpFunctionParameter %v4f32\n"
7467 "%label_testfun = OpLabel\n"
7469 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7470 "OpReturnValue %val1\n"
7472 "OpLine %name 1 1\n"
7475 createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7477 return opLineTests.release();
7480 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7482 RGBA defaultColors[4];
7483 de::MovePtr<tcu::TestCaseGroup> opModuleProcessedTests (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7484 map<string, string> fragments;
7485 std::vector<std::string> noExtensions;
7486 GraphicsResources resources;
7488 getDefaultColors(defaultColors);
7489 resources.verifyBinary = veryfiBinaryShader;
7490 resources.spirvVersion = SPIRV_VERSION_1_3;
7492 fragments["moduleprocessed"] =
7493 "OpModuleProcessed \"VULKAN CTS\"\n"
7494 "OpModuleProcessed \"Negative values\"\n"
7495 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7497 fragments["pre_main"] =
7498 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7499 "%second_param1 = OpFunctionParameter %v4f32\n"
7500 "%label_secondfunction = OpLabel\n"
7501 "OpReturnValue %second_param1\n"
7504 fragments["testfun"] =
7505 // A %test_code function that returns its argument unchanged.
7506 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7507 "%param1 = OpFunctionParameter %v4f32\n"
7508 "%label_testfun = OpLabel\n"
7509 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7510 "OpReturnValue %val1\n"
7513 createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7515 return opModuleProcessedTests.release();
7519 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7521 RGBA defaultColors[4];
7522 de::MovePtr<tcu::TestCaseGroup> opLineTests (new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7523 map<string, string> fragments;
7524 std::vector<std::pair<std::string, std::string> > problemStrings;
7526 problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7527 problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7528 problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7529 getDefaultColors(defaultColors);
7531 fragments["debug"] =
7532 "%other_name = OpString \"other_name\"\n";
7534 fragments["pre_main"] =
7535 "OpLine %file_name 32 0\n"
7536 "OpLine %file_name 32 32\n"
7537 "OpLine %file_name 32 40\n"
7538 "OpLine %other_name 32 40\n"
7539 "OpLine %other_name 0 100\n"
7540 "OpLine %other_name 0 4294967295\n"
7541 "OpLine %other_name 4294967295 0\n"
7542 "OpLine %other_name 32 40\n"
7543 "OpLine %file_name 0 0\n"
7544 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7545 "OpLine %file_name 1 0\n"
7546 "%second_param1 = OpFunctionParameter %v4f32\n"
7547 "OpLine %file_name 1 3\n"
7548 "OpLine %file_name 1 2\n"
7549 "%label_secondfunction = OpLabel\n"
7550 "OpLine %file_name 0 2\n"
7551 "OpReturnValue %second_param1\n"
7553 "OpLine %file_name 0 2\n"
7554 "OpLine %file_name 0 2\n";
7556 fragments["testfun"] =
7557 // A %test_code function that returns its argument unchanged.
7558 "OpLine %file_name 1 0\n"
7559 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7560 "OpLine %file_name 16 330\n"
7561 "%param1 = OpFunctionParameter %v4f32\n"
7562 "OpLine %file_name 14 442\n"
7563 "%label_testfun = OpLabel\n"
7564 "OpLine %file_name 11 1024\n"
7565 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7566 "OpLine %file_name 2 97\n"
7567 "OpReturnValue %val1\n"
7569 "OpLine %file_name 5 32\n";
7571 for (size_t i = 0; i < problemStrings.size(); ++i)
7573 map<string, string> testFragments = fragments;
7574 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7575 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7578 return opLineTests.release();
7581 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7583 de::MovePtr<tcu::TestCaseGroup> opConstantNullTests (new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7587 const char functionStart[] =
7588 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7589 "%param1 = OpFunctionParameter %v4f32\n"
7592 const char functionEnd[] =
7593 "OpReturnValue %transformed_param\n"
7596 struct NameConstantsCode
7603 NameConstantsCode tests[] =
7607 "%cnull = OpConstantNull %v4f32\n",
7608 "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7612 "%cnull = OpConstantNull %f32\n",
7613 "%vp = OpVariable %fp_v4f32 Function\n"
7614 "%v = OpLoad %v4f32 %vp\n"
7615 "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7616 "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7617 "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7618 "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7619 "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7623 "%cnull = OpConstantNull %bool\n",
7624 "%v = OpVariable %fp_v4f32 Function\n"
7625 " OpStore %v %param1\n"
7626 " OpSelectionMerge %false_label None\n"
7627 " OpBranchConditional %cnull %true_label %false_label\n"
7628 "%true_label = OpLabel\n"
7629 " OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7630 " OpBranch %false_label\n"
7631 "%false_label = OpLabel\n"
7632 "%transformed_param = OpLoad %v4f32 %v\n"
7636 "%cnull = OpConstantNull %i32\n",
7637 "%v = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7638 "%b = OpIEqual %bool %cnull %c_i32_0\n"
7639 " OpSelectionMerge %false_label None\n"
7640 " OpBranchConditional %b %true_label %false_label\n"
7641 "%true_label = OpLabel\n"
7642 " OpStore %v %param1\n"
7643 " OpBranch %false_label\n"
7644 "%false_label = OpLabel\n"
7645 "%transformed_param = OpLoad %v4f32 %v\n"
7649 "%stype = OpTypeStruct %f32 %v4f32\n"
7650 "%fp_stype = OpTypePointer Function %stype\n"
7651 "%cnull = OpConstantNull %stype\n",
7652 "%v = OpVariable %fp_stype Function %cnull\n"
7653 "%f = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7654 "%f_val = OpLoad %v4f32 %f\n"
7655 "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7659 "%a4_v4f32 = OpTypeArray %v4f32 %c_u32_4\n"
7660 "%fp_a4_v4f32 = OpTypePointer Function %a4_v4f32\n"
7661 "%cnull = OpConstantNull %a4_v4f32\n",
7662 "%v = OpVariable %fp_a4_v4f32 Function %cnull\n"
7663 "%f = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7664 "%f1 = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7665 "%f2 = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7666 "%f3 = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7667 "%f_val = OpLoad %v4f32 %f\n"
7668 "%f1_val = OpLoad %v4f32 %f1\n"
7669 "%f2_val = OpLoad %v4f32 %f2\n"
7670 "%f3_val = OpLoad %v4f32 %f3\n"
7671 "%t0 = OpFAdd %v4f32 %param1 %f_val\n"
7672 "%t1 = OpFAdd %v4f32 %t0 %f1_val\n"
7673 "%t2 = OpFAdd %v4f32 %t1 %f2_val\n"
7674 "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7678 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7679 "%cnull = OpConstantNull %mat4x4_f32\n",
7680 // Our null matrix * any vector should result in a zero vector.
7681 "%v = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7682 "%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7686 getHalfColorsFullAlpha(colors);
7688 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7690 map<string, string> fragments;
7691 fragments["pre_main"] = tests[testNdx].constants;
7692 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7693 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7695 return opConstantNullTests.release();
7697 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7699 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7700 RGBA inputColors[4];
7701 RGBA outputColors[4];
7704 const char functionStart[] =
7705 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7706 "%param1 = OpFunctionParameter %v4f32\n"
7709 const char functionEnd[] =
7710 "OpReturnValue %transformed_param\n"
7713 struct NameConstantsCode
7720 NameConstantsCode tests[] =
7725 "%cval = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7726 "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7731 "%stype = OpTypeStruct %v4f32 %f32\n"
7732 "%fp_stype = OpTypePointer Function %stype\n"
7733 "%f32_n_1 = OpConstant %f32 -1.0\n"
7734 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7735 "%cvec = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7736 "%cval = OpConstantComposite %stype %cvec %f32_n_1\n",
7738 "%v = OpVariable %fp_stype Function %cval\n"
7739 "%vec_ptr = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7740 "%f32_ptr = OpAccessChain %fp_f32 %v %c_u32_1\n"
7741 "%vec_val = OpLoad %v4f32 %vec_ptr\n"
7742 "%f32_val = OpLoad %f32 %f32_ptr\n"
7743 "%tmp1 = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7744 "%tmp2 = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7745 "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7748 // [1|0|0|0.5] [x] = x + 0.5
7749 // [0|1|0|0.5] [y] = y + 0.5
7750 // [0|0|1|0.5] [z] = z + 0.5
7751 // [0|0|0|1 ] [1] = 1
7754 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7755 "%v4f32_1_0_0_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7756 "%v4f32_0_1_0_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7757 "%v4f32_0_0_1_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7758 "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7759 "%cval = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7761 "%transformed_param = OpMatrixTimesVector %v4f32 %cval %param1\n"
7766 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7767 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
7768 "%f32_n_1 = OpConstant %f32 -1.0\n"
7769 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7770 "%carr = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7772 "%v = OpVariable %fp_a4f32 Function %carr\n"
7773 "%f = OpAccessChain %fp_f32 %v %c_u32_0\n"
7774 "%f1 = OpAccessChain %fp_f32 %v %c_u32_1\n"
7775 "%f2 = OpAccessChain %fp_f32 %v %c_u32_2\n"
7776 "%f3 = OpAccessChain %fp_f32 %v %c_u32_3\n"
7777 "%f_val = OpLoad %f32 %f\n"
7778 "%f1_val = OpLoad %f32 %f1\n"
7779 "%f2_val = OpLoad %f32 %f2\n"
7780 "%f3_val = OpLoad %f32 %f3\n"
7781 "%ftot1 = OpFAdd %f32 %f_val %f1_val\n"
7782 "%ftot2 = OpFAdd %f32 %ftot1 %f2_val\n"
7783 "%ftot3 = OpFAdd %f32 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
7784 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7785 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"
7792 // [ 1.0, 1.0, 1.0, 1.0]
7796 // [ 0.0, 0.5, 0.0, 0.0]
7800 // [ 1.0, 1.0, 1.0, 1.0]
7803 "array_of_struct_of_array",
7805 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7806 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
7807 "%stype = OpTypeStruct %f32 %a4f32\n"
7808 "%a3stype = OpTypeArray %stype %c_u32_3\n"
7809 "%fp_a3stype = OpTypePointer Function %a3stype\n"
7810 "%ca4f32_0 = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7811 "%ca4f32_1 = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7812 "%cstype1 = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7813 "%cstype2 = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7814 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7816 "%v = OpVariable %fp_a3stype Function %carr\n"
7817 "%f = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7818 "%f_l = OpLoad %f32 %f\n"
7819 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7820 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"
7824 getHalfColorsFullAlpha(inputColors);
7825 outputColors[0] = RGBA(255, 255, 255, 255);
7826 outputColors[1] = RGBA(255, 127, 127, 255);
7827 outputColors[2] = RGBA(127, 255, 127, 255);
7828 outputColors[3] = RGBA(127, 127, 255, 255);
7830 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7832 map<string, string> fragments;
7833 fragments["pre_main"] = tests[testNdx].constants;
7834 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7835 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7837 return opConstantCompositeTests.release();
7840 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7842 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7843 RGBA inputColors[4];
7844 RGBA outputColors[4];
7845 map<string, string> fragments;
7847 // vec4 test_code(vec4 param) {
7848 // vec4 result = param;
7849 // for (int i = 0; i < 4; ++i) {
7850 // if (i == 0) result[i] = 0.;
7851 // else result[i] = 1. - result[i];
7855 const char function[] =
7856 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7857 "%param1 = OpFunctionParameter %v4f32\n"
7859 "%iptr = OpVariable %fp_i32 Function\n"
7860 "%result = OpVariable %fp_v4f32 Function\n"
7861 " OpStore %iptr %c_i32_0\n"
7862 " OpStore %result %param1\n"
7865 // Loop entry block.
7867 "%ival = OpLoad %i32 %iptr\n"
7868 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
7869 " OpLoopMerge %exit %if_entry None\n"
7870 " OpBranchConditional %lt_4 %if_entry %exit\n"
7872 // Merge block for loop.
7874 "%ret = OpLoad %v4f32 %result\n"
7875 " OpReturnValue %ret\n"
7877 // If-statement entry block.
7878 "%if_entry = OpLabel\n"
7879 "%loc = OpAccessChain %fp_f32 %result %ival\n"
7880 "%eq_0 = OpIEqual %bool %ival %c_i32_0\n"
7881 " OpSelectionMerge %if_exit None\n"
7882 " OpBranchConditional %eq_0 %if_true %if_false\n"
7884 // False branch for if-statement.
7885 "%if_false = OpLabel\n"
7886 "%val = OpLoad %f32 %loc\n"
7887 "%sub = OpFSub %f32 %c_f32_1 %val\n"
7888 " OpStore %loc %sub\n"
7889 " OpBranch %if_exit\n"
7891 // Merge block for if-statement.
7892 "%if_exit = OpLabel\n"
7893 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7894 " OpStore %iptr %ival_next\n"
7897 // True branch for if-statement.
7898 "%if_true = OpLabel\n"
7899 " OpStore %loc %c_f32_0\n"
7900 " OpBranch %if_exit\n"
7904 fragments["testfun"] = function;
7906 inputColors[0] = RGBA(127, 127, 127, 0);
7907 inputColors[1] = RGBA(127, 0, 0, 0);
7908 inputColors[2] = RGBA(0, 127, 0, 0);
7909 inputColors[3] = RGBA(0, 0, 127, 0);
7911 outputColors[0] = RGBA(0, 128, 128, 255);
7912 outputColors[1] = RGBA(0, 255, 255, 255);
7913 outputColors[2] = RGBA(0, 128, 255, 255);
7914 outputColors[3] = RGBA(0, 255, 128, 255);
7916 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7918 return group.release();
7921 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7923 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7924 RGBA inputColors[4];
7925 RGBA outputColors[4];
7926 map<string, string> fragments;
7928 const char typesAndConstants[] =
7929 "%c_f32_p2 = OpConstant %f32 0.2\n"
7930 "%c_f32_p4 = OpConstant %f32 0.4\n"
7931 "%c_f32_p6 = OpConstant %f32 0.6\n"
7932 "%c_f32_p8 = OpConstant %f32 0.8\n";
7934 // vec4 test_code(vec4 param) {
7935 // vec4 result = param;
7936 // for (int i = 0; i < 4; ++i) {
7938 // case 0: result[i] += .2; break;
7939 // case 1: result[i] += .6; break;
7940 // case 2: result[i] += .4; break;
7941 // case 3: result[i] += .8; break;
7942 // default: break; // unreachable
7947 const char function[] =
7948 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7949 "%param1 = OpFunctionParameter %v4f32\n"
7951 "%iptr = OpVariable %fp_i32 Function\n"
7952 "%result = OpVariable %fp_v4f32 Function\n"
7953 " OpStore %iptr %c_i32_0\n"
7954 " OpStore %result %param1\n"
7957 // Loop entry block.
7959 "%ival = OpLoad %i32 %iptr\n"
7960 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
7961 " OpLoopMerge %exit %cont None\n"
7962 " OpBranchConditional %lt_4 %switch_entry %exit\n"
7964 // Merge block for loop.
7966 "%ret = OpLoad %v4f32 %result\n"
7967 " OpReturnValue %ret\n"
7969 // Switch-statement entry block.
7970 "%switch_entry = OpLabel\n"
7971 "%loc = OpAccessChain %fp_f32 %result %ival\n"
7972 "%val = OpLoad %f32 %loc\n"
7973 " OpSelectionMerge %switch_exit None\n"
7974 " OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7976 "%case2 = OpLabel\n"
7977 "%addp4 = OpFAdd %f32 %val %c_f32_p4\n"
7978 " OpStore %loc %addp4\n"
7979 " OpBranch %switch_exit\n"
7981 "%switch_default = OpLabel\n"
7984 "%case3 = OpLabel\n"
7985 "%addp8 = OpFAdd %f32 %val %c_f32_p8\n"
7986 " OpStore %loc %addp8\n"
7987 " OpBranch %switch_exit\n"
7989 "%case0 = OpLabel\n"
7990 "%addp2 = OpFAdd %f32 %val %c_f32_p2\n"
7991 " OpStore %loc %addp2\n"
7992 " OpBranch %switch_exit\n"
7994 // Merge block for switch-statement.
7995 "%switch_exit = OpLabel\n"
7996 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7997 " OpStore %iptr %ival_next\n"
8002 "%case1 = OpLabel\n"
8003 "%addp6 = OpFAdd %f32 %val %c_f32_p6\n"
8004 " OpStore %loc %addp6\n"
8005 " OpBranch %switch_exit\n"
8009 fragments["pre_main"] = typesAndConstants;
8010 fragments["testfun"] = function;
8012 inputColors[0] = RGBA(127, 27, 127, 51);
8013 inputColors[1] = RGBA(127, 0, 0, 51);
8014 inputColors[2] = RGBA(0, 27, 0, 51);
8015 inputColors[3] = RGBA(0, 0, 127, 51);
8017 outputColors[0] = RGBA(178, 180, 229, 255);
8018 outputColors[1] = RGBA(178, 153, 102, 255);
8019 outputColors[2] = RGBA(51, 180, 102, 255);
8020 outputColors[3] = RGBA(51, 153, 229, 255);
8022 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8024 addOpSwitchAmberTests(*group, testCtx);
8026 return group.release();
8029 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
8031 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
8032 RGBA inputColors[4];
8033 RGBA outputColors[4];
8034 map<string, string> fragments;
8036 const char decorations[] =
8037 "OpDecorate %array_group ArrayStride 4\n"
8038 "OpDecorate %struct_member_group Offset 0\n"
8039 "%array_group = OpDecorationGroup\n"
8040 "%struct_member_group = OpDecorationGroup\n"
8042 "OpDecorate %group1 RelaxedPrecision\n"
8043 "OpDecorate %group3 RelaxedPrecision\n"
8044 "OpDecorate %group3 Flat\n"
8045 "OpDecorate %group3 Restrict\n"
8046 "%group0 = OpDecorationGroup\n"
8047 "%group1 = OpDecorationGroup\n"
8048 "%group3 = OpDecorationGroup\n";
8050 const char typesAndConstants[] =
8051 "%a3f32 = OpTypeArray %f32 %c_u32_3\n"
8052 "%struct1 = OpTypeStruct %a3f32\n"
8053 "%struct2 = OpTypeStruct %a3f32\n"
8054 "%fp_struct1 = OpTypePointer Function %struct1\n"
8055 "%fp_struct2 = OpTypePointer Function %struct2\n"
8056 "%c_f32_2 = OpConstant %f32 2.\n"
8057 "%c_f32_n2 = OpConstant %f32 -2.\n"
8059 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8060 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8061 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8062 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8064 const char function[] =
8065 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8066 "%param = OpFunctionParameter %v4f32\n"
8067 "%entry = OpLabel\n"
8068 "%result = OpVariable %fp_v4f32 Function\n"
8069 "%v_struct1 = OpVariable %fp_struct1 Function\n"
8070 "%v_struct2 = OpVariable %fp_struct2 Function\n"
8071 " OpStore %result %param\n"
8072 " OpStore %v_struct1 %c_struct1\n"
8073 " OpStore %v_struct2 %c_struct2\n"
8074 "%ptr1 = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8075 "%val1 = OpLoad %f32 %ptr1\n"
8076 "%ptr2 = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8077 "%val2 = OpLoad %f32 %ptr2\n"
8078 "%addvalues = OpFAdd %f32 %val1 %val2\n"
8079 "%ptr = OpAccessChain %fp_f32 %result %c_i32_1\n"
8080 "%val = OpLoad %f32 %ptr\n"
8081 "%addresult = OpFAdd %f32 %addvalues %val\n"
8082 " OpStore %ptr %addresult\n"
8083 "%ret = OpLoad %v4f32 %result\n"
8084 " OpReturnValue %ret\n"
8087 struct CaseNameDecoration
8093 CaseNameDecoration tests[] =
8096 "same_decoration_group_on_multiple_types",
8097 "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
8100 "empty_decoration_group",
8101 "OpGroupDecorate %group0 %a3f32\n"
8102 "OpGroupDecorate %group0 %result\n"
8105 "one_element_decoration_group",
8106 "OpGroupDecorate %array_group %a3f32\n"
8109 "multiple_elements_decoration_group",
8110 "OpGroupDecorate %group3 %v_struct1\n"
8113 "multiple_decoration_groups_on_same_variable",
8114 "OpGroupDecorate %group0 %v_struct2\n"
8115 "OpGroupDecorate %group1 %v_struct2\n"
8116 "OpGroupDecorate %group3 %v_struct2\n"
8119 "same_decoration_group_multiple_times",
8120 "OpGroupDecorate %group1 %addvalues\n"
8121 "OpGroupDecorate %group1 %addvalues\n"
8122 "OpGroupDecorate %group1 %addvalues\n"
8127 getHalfColorsFullAlpha(inputColors);
8128 getHalfColorsFullAlpha(outputColors);
8130 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8132 fragments["decoration"] = decorations + tests[idx].decoration;
8133 fragments["pre_main"] = typesAndConstants;
8134 fragments["testfun"] = function;
8136 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8139 return group.release();
8142 struct SpecConstantTwoValGraphicsCase
8144 const std::string caseName;
8145 const std::string scDefinition0;
8146 const std::string scDefinition1;
8147 const std::string scResultType;
8148 const std::string scOperation;
8149 SpecConstantValue scActualValue0;
8150 SpecConstantValue scActualValue1;
8151 const std::string resultOperation;
8152 RGBA expectedColors[4];
8153 CaseFlags caseFlags;
8155 SpecConstantTwoValGraphicsCase (const std::string& name,
8156 const std::string& definition0,
8157 const std::string& definition1,
8158 const std::string& resultType,
8159 const std::string& operation,
8160 const SpecConstantValue& value0,
8161 const SpecConstantValue& value1,
8162 const std::string& resultOp,
8163 const RGBA (&output)[4],
8164 CaseFlags flags = FLAG_NONE)
8166 , scDefinition0 (definition0)
8167 , scDefinition1 (definition1)
8168 , scResultType (resultType)
8169 , scOperation (operation)
8170 , scActualValue0 (value0)
8171 , scActualValue1 (value1)
8172 , resultOperation (resultOp)
8175 expectedColors[0] = output[0];
8176 expectedColors[1] = output[1];
8177 expectedColors[2] = output[2];
8178 expectedColors[3] = output[3];
8182 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
8184 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
8185 vector<SpecConstantTwoValGraphicsCase> cases;
8186 RGBA inputColors[4];
8187 RGBA outputColors0[4];
8188 RGBA outputColors1[4];
8189 RGBA outputColors2[4];
8191 const char decorations1[] =
8192 "OpDecorate %sc_0 SpecId 0\n"
8193 "OpDecorate %sc_1 SpecId 1\n";
8195 const char typesAndConstants1[] =
8196 "${OPTYPE_DEFINITIONS:opt}"
8197 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
8198 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
8199 "%sc_op = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8201 const char function1[] =
8202 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8203 "%param = OpFunctionParameter %v4f32\n"
8204 "%label = OpLabel\n"
8205 "%result = OpVariable %fp_v4f32 Function\n"
8206 "${TYPE_CONVERT:opt}"
8207 " OpStore %result %param\n"
8208 "%gen = ${GEN_RESULT}\n"
8209 "%index = OpIAdd %i32 %gen %c_i32_1\n"
8210 "%loc = OpAccessChain %fp_f32 %result %index\n"
8211 "%val = OpLoad %f32 %loc\n"
8212 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8213 " OpStore %loc %add\n"
8214 "%ret = OpLoad %v4f32 %result\n"
8215 " OpReturnValue %ret\n"
8218 inputColors[0] = RGBA(127, 127, 127, 255);
8219 inputColors[1] = RGBA(127, 0, 0, 255);
8220 inputColors[2] = RGBA(0, 127, 0, 255);
8221 inputColors[3] = RGBA(0, 0, 127, 255);
8223 // Derived from inputColors[x] by adding 128 to inputColors[x][0].
8224 outputColors0[0] = RGBA(255, 127, 127, 255);
8225 outputColors0[1] = RGBA(255, 0, 0, 255);
8226 outputColors0[2] = RGBA(128, 127, 0, 255);
8227 outputColors0[3] = RGBA(128, 0, 127, 255);
8229 // Derived from inputColors[x] by adding 128 to inputColors[x][1].
8230 outputColors1[0] = RGBA(127, 255, 127, 255);
8231 outputColors1[1] = RGBA(127, 128, 0, 255);
8232 outputColors1[2] = RGBA(0, 255, 0, 255);
8233 outputColors1[3] = RGBA(0, 128, 127, 255);
8235 // Derived from inputColors[x] by adding 128 to inputColors[x][2].
8236 outputColors2[0] = RGBA(127, 127, 255, 255);
8237 outputColors2[1] = RGBA(127, 0, 128, 255);
8238 outputColors2[2] = RGBA(0, 127, 128, 255);
8239 outputColors2[3] = RGBA(0, 0, 255, 255);
8241 const char addZeroToSc[] = "OpIAdd %i32 %c_i32_0 %sc_op";
8242 const char addZeroToSc32[] = "OpIAdd %i32 %c_i32_0 %sc_op32";
8243 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8244 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8246 cases.push_back(SpecConstantTwoValGraphicsCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 19, -20, addZeroToSc, outputColors0));
8247 cases.push_back(SpecConstantTwoValGraphicsCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 19, 20, addZeroToSc, outputColors0));
8248 cases.push_back(SpecConstantTwoValGraphicsCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -1, -1, addZeroToSc, outputColors2));
8249 cases.push_back(SpecConstantTwoValGraphicsCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1", -126, 126, addZeroToSc, outputColors0));
8250 cases.push_back(SpecConstantTwoValGraphicsCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1", 126, 126, addZeroToSc, outputColors2));
8251 cases.push_back(SpecConstantTwoValGraphicsCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8252 cases.push_back(SpecConstantTwoValGraphicsCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8253 cases.push_back(SpecConstantTwoValGraphicsCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1", 1001, 500, addZeroToSc, outputColors2));
8254 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand", " %i32 0", " %i32 0", "%i32", "BitwiseAnd %sc_0 %sc_1", 0x33, 0x0d, addZeroToSc, outputColors2));
8255 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor", " %i32 0", " %i32 0", "%i32", "BitwiseOr %sc_0 %sc_1", 0, 1, addZeroToSc, outputColors2));
8256 cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor", " %i32 0", " %i32 0", "%i32", "BitwiseXor %sc_0 %sc_1", 0x2e, 0x2f, addZeroToSc, outputColors2));
8257 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, 1, addZeroToSc, outputColors2));
8258 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, 2, addZeroToSc, outputColors0));
8259 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, 0, addZeroToSc, outputColors2));
8261 // Shifts for other integer sizes.
8262 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightLogical %sc_0 %sc_1", deInt64{2}, deInt64{1}, addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8263 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightArithmetic %sc_0 %sc_1", deInt64{-4}, deInt64{2}, addZeroToSc32, outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8264 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftLeftLogical %sc_0 %sc_1", deInt64{1}, deInt64{0}, addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8265 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightLogical %sc_0 %sc_1", deInt16{2}, deInt16{1}, addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8266 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightArithmetic %sc_0 %sc_1", deInt16{-4}, deInt16{2}, addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8267 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftLeftLogical %sc_0 %sc_1", deInt16{1}, deInt16{0}, addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8268 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightLogical %sc_0 %sc_1", deInt8{2}, deInt8{1}, addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8269 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightArithmetic %sc_0 %sc_1", deInt8{-4}, deInt8{2}, addZeroToSc32, outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8270 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftLeftLogical %sc_0 %sc_1", deInt8{1}, deInt8{0}, addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8272 // Shifts for other integer sizes but only in the shift amount.
8273 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt64{1}, addZeroToSc, outputColors2, (FLAG_I64)));
8274 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt64{2}, addZeroToSc, outputColors0, (FLAG_I64)));
8275 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt64{0}, addZeroToSc, outputColors2, (FLAG_I64)));
8276 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt16{1}, addZeroToSc, outputColors2, (FLAG_I16)));
8277 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt16{2}, addZeroToSc, outputColors0, (FLAG_I16)));
8278 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt16{0}, addZeroToSc, outputColors2, (FLAG_I16)));
8279 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt8{1}, addZeroToSc, outputColors2, (FLAG_I8)));
8280 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt8{2}, addZeroToSc, outputColors0, (FLAG_I8)));
8281 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt8{0}, addZeroToSc, outputColors2, (FLAG_I8)));
8283 cases.push_back(SpecConstantTwoValGraphicsCase("slessthan", " %i32 0", " %i32 0", "%bool", "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc, outputColors2));
8284 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan", " %i32 0", " %i32 0", "%bool", "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputColors2));
8285 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan", " %i32 0", " %i32 0", "%bool", "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputColors2));
8286 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan", " %i32 0", " %i32 0", "%bool", "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputColors2));
8287 cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal", " %i32 0", " %i32 0", "%bool", "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc, outputColors2));
8288 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal", " %i32 0", " %i32 0", "%bool", "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc, outputColors2));
8289 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool", "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputColors2));
8290 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool", "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputColors2));
8291 cases.push_back(SpecConstantTwoValGraphicsCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc, outputColors2));
8292 cases.push_back(SpecConstantTwoValGraphicsCase("inotequal", " %i32 0", " %i32 0", "%bool", "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputColors2));
8293 cases.push_back(SpecConstantTwoValGraphicsCase("logicaland", "True %bool", "True %bool", "%bool", "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputColors2));
8294 cases.push_back(SpecConstantTwoValGraphicsCase("logicalor", "False %bool", "False %bool", "%bool", "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputColors2));
8295 cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal", "True %bool", "True %bool", "%bool", "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputColors2));
8296 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal", "False %bool", "False %bool", "%bool", "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputColors2));
8297 cases.push_back(SpecConstantTwoValGraphicsCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -1, 0, addZeroToSc, outputColors2));
8298 cases.push_back(SpecConstantTwoValGraphicsCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -2, 0, addZeroToSc, outputColors2));
8299 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputColors2));
8300 cases.push_back(SpecConstantTwoValGraphicsCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %c_i32_0", 1, 1, addZeroToSc, outputColors2));
8301 cases.push_back(SpecConstantTwoValGraphicsCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -1, 0, addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8302 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", tcu::Float32(-1.0), tcu::Float32(0.0), addZeroToSc32, outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8303 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0", tcu::Float16(-1.0), tcu::Float16(0.0), addZeroToSc32, outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8304 // \todo[2015-12-1 antiagainst] OpQuantizeToF16
8306 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8308 map<string, string> specializations;
8309 map<string, string> fragments;
8310 SpecConstants specConstants;
8311 PushConstants noPushConstants;
8312 GraphicsResources noResources;
8313 GraphicsInterfaces noInterfaces;
8314 vector<string> extensions;
8315 VulkanFeatures requiredFeatures;
8317 // Special SPIR-V code when using 16-bit integers.
8318 if (cases[caseNdx].caseFlags & FLAG_I16)
8320 requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
8321 fragments["capability"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
8322 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
8323 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8324 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 16-bit integer to 32-bit integer
8327 // Special SPIR-V code when using 64-bit integers.
8328 if (cases[caseNdx].caseFlags & FLAG_I64)
8330 requiredFeatures.coreFeatures.shaderInt64 = VK_TRUE;
8331 fragments["capability"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
8332 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
8333 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8334 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 64-bit integer to 32-bit integer
8337 // Special SPIR-V code when using 64-bit floats.
8338 if (cases[caseNdx].caseFlags & FLAG_F64)
8340 requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
8341 fragments["capability"] += "OpCapability Float64\n"; // Adds 64-bit float capability
8342 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
8343 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8344 specializations["TYPE_CONVERT"] += "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 64-bit float to 32-bit integer
8347 // Extension needed for float16 and int8.
8348 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8349 extensions.push_back("VK_KHR_shader_float16_int8");
8351 // Special SPIR-V code when using 16-bit floats.
8352 if (cases[caseNdx].caseFlags & FLAG_F16)
8354 requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8355 fragments["capability"] += "OpCapability Float16\n"; // Adds 16-bit float capability
8356 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
8357 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8358 specializations["TYPE_CONVERT"] += "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 16-bit float to 32-bit integer
8361 // Special SPIR-V code when using 8-bit integers.
8362 if (cases[caseNdx].caseFlags & FLAG_I8)
8364 requiredFeatures.extFloat16Int8.shaderInt8 = true;
8365 fragments["capability"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
8366 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
8367 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8368 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 8-bit integer to 32-bit integer
8371 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
8372 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
8373 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
8374 specializations["SC_OP"] = cases[caseNdx].scOperation;
8375 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
8377 fragments["decoration"] = tcu::StringTemplate(decorations1).specialize(specializations);
8378 fragments["pre_main"] = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8379 fragments["testfun"] = tcu::StringTemplate(function1).specialize(specializations);
8381 cases[caseNdx].scActualValue0.appendTo(specConstants);
8382 cases[caseNdx].scActualValue1.appendTo(specConstants);
8384 createTestsForAllStages(
8385 cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
8386 noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
8389 const char decorations2[] =
8390 "OpDecorate %sc_0 SpecId 0\n"
8391 "OpDecorate %sc_1 SpecId 1\n"
8392 "OpDecorate %sc_2 SpecId 2\n";
8394 const std::string typesAndConstants2 =
8395 "%vec3_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8396 "%vec3_undef = OpUndef %v3i32\n"
8398 + getSpecConstantOpStructConstantsAndTypes() + getSpecConstantOpStructComposites() +
8400 "%sc_0 = OpSpecConstant %i32 0\n"
8401 "%sc_1 = OpSpecConstant %i32 0\n"
8402 "%sc_2 = OpSpecConstant %i32 0\n"
8404 + getSpecConstantOpStructConstBlock() +
8406 "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
8407 "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
8408 "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
8409 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
8410 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
8411 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
8412 "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
8413 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
8414 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
8415 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
8416 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
8417 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
8418 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
8420 const std::string function2 =
8421 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8422 "%param = OpFunctionParameter %v4f32\n"
8423 "%label = OpLabel\n"
8424 "%result = OpVariable %fp_v4f32 Function\n"
8426 + getSpecConstantOpStructInstructions() +
8428 " OpStore %result %param\n"
8429 "%loc = OpAccessChain %fp_f32 %result %sc_final\n"
8430 "%val = OpLoad %f32 %loc\n"
8431 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8432 " OpStore %loc %add\n"
8433 "%ret = OpLoad %v4f32 %result\n"
8434 " OpReturnValue %ret\n"
8437 map<string, string> fragments;
8438 SpecConstants specConstants;
8440 fragments["decoration"] = decorations2;
8441 fragments["pre_main"] = typesAndConstants2;
8442 fragments["testfun"] = function2;
8444 specConstants.append<deInt32>(56789);
8445 specConstants.append<deInt32>(-2);
8446 specConstants.append<deInt32>(56788);
8448 createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8450 return group.release();
8453 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8455 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8456 RGBA inputColors[4];
8457 RGBA outputColors1[4];
8458 RGBA outputColors2[4];
8459 RGBA outputColors3[4];
8460 RGBA outputColors4[4];
8461 map<string, string> fragments1;
8462 map<string, string> fragments2;
8463 map<string, string> fragments3;
8464 map<string, string> fragments4;
8465 std::vector<std::string> extensions4;
8466 GraphicsResources resources4;
8467 VulkanFeatures vulkanFeatures4;
8469 const char typesAndConstants1[] =
8470 "%c_f32_p2 = OpConstant %f32 0.2\n"
8471 "%c_f32_p4 = OpConstant %f32 0.4\n"
8472 "%c_f32_p5 = OpConstant %f32 0.5\n"
8473 "%c_f32_p8 = OpConstant %f32 0.8\n";
8475 // vec4 test_code(vec4 param) {
8476 // vec4 result = param;
8477 // for (int i = 0; i < 4; ++i) {
8480 // case 0: operand = .2; break;
8481 // case 1: operand = .5; break;
8482 // case 2: operand = .4; break;
8483 // case 3: operand = .0; break;
8484 // default: break; // unreachable
8486 // result[i] += operand;
8490 const char function1[] =
8491 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8492 "%param1 = OpFunctionParameter %v4f32\n"
8494 "%iptr = OpVariable %fp_i32 Function\n"
8495 "%result = OpVariable %fp_v4f32 Function\n"
8496 " OpStore %iptr %c_i32_0\n"
8497 " OpStore %result %param1\n"
8501 "%ival = OpLoad %i32 %iptr\n"
8502 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
8503 " OpLoopMerge %exit %cont None\n"
8504 " OpBranchConditional %lt_4 %entry %exit\n"
8506 "%entry = OpLabel\n"
8507 "%loc = OpAccessChain %fp_f32 %result %ival\n"
8508 "%val = OpLoad %f32 %loc\n"
8509 " OpSelectionMerge %phi None\n"
8510 " OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8512 "%case0 = OpLabel\n"
8514 "%case1 = OpLabel\n"
8516 "%case2 = OpLabel\n"
8518 "%case3 = OpLabel\n"
8521 "%default = OpLabel\n"
8525 "%operand = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8528 "%add = OpFAdd %f32 %val %operand\n"
8529 " OpStore %loc %add\n"
8530 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8531 " OpStore %iptr %ival_next\n"
8535 "%ret = OpLoad %v4f32 %result\n"
8536 " OpReturnValue %ret\n"
8540 fragments1["pre_main"] = typesAndConstants1;
8541 fragments1["testfun"] = function1;
8543 getHalfColorsFullAlpha(inputColors);
8545 outputColors1[0] = RGBA(178, 255, 229, 255);
8546 outputColors1[1] = RGBA(178, 127, 102, 255);
8547 outputColors1[2] = RGBA(51, 255, 102, 255);
8548 outputColors1[3] = RGBA(51, 127, 229, 255);
8550 createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8552 const char typesAndConstants2[] =
8553 "%c_f32_p2 = OpConstant %f32 0.2\n";
8555 // Add .4 to the second element of the given parameter.
8556 const char function2[] =
8557 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8558 "%param = OpFunctionParameter %v4f32\n"
8559 "%entry = OpLabel\n"
8560 "%result = OpVariable %fp_v4f32 Function\n"
8561 " OpStore %result %param\n"
8562 "%loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8563 "%val = OpLoad %f32 %loc\n"
8567 "%step = OpPhi %i32 %c_i32_0 %entry %step_next %phi\n"
8568 "%accum = OpPhi %f32 %val %entry %accum_next %phi\n"
8569 "%step_next = OpIAdd %i32 %step %c_i32_1\n"
8570 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8571 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8572 " OpLoopMerge %exit %phi None\n"
8573 " OpBranchConditional %still_loop %phi %exit\n"
8576 " OpStore %loc %accum\n"
8577 "%ret = OpLoad %v4f32 %result\n"
8578 " OpReturnValue %ret\n"
8582 fragments2["pre_main"] = typesAndConstants2;
8583 fragments2["testfun"] = function2;
8585 outputColors2[0] = RGBA(127, 229, 127, 255);
8586 outputColors2[1] = RGBA(127, 102, 0, 255);
8587 outputColors2[2] = RGBA(0, 229, 0, 255);
8588 outputColors2[3] = RGBA(0, 102, 127, 255);
8590 createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8592 const char typesAndConstants3[] =
8593 "%true = OpConstantTrue %bool\n"
8594 "%false = OpConstantFalse %bool\n"
8595 "%c_f32_p2 = OpConstant %f32 0.2\n";
8597 // Swap the second and the third element of the given parameter.
8598 const char function3[] =
8599 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8600 "%param = OpFunctionParameter %v4f32\n"
8601 "%entry = OpLabel\n"
8602 "%result = OpVariable %fp_v4f32 Function\n"
8603 " OpStore %result %param\n"
8604 "%a_loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8605 "%a_init = OpLoad %f32 %a_loc\n"
8606 "%b_loc = OpAccessChain %fp_f32 %result %c_i32_2\n"
8607 "%b_init = OpLoad %f32 %b_loc\n"
8611 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8612 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
8613 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
8614 " OpLoopMerge %exit %phi None\n"
8615 " OpBranchConditional %still_loop %phi %exit\n"
8618 " OpStore %a_loc %a_next\n"
8619 " OpStore %b_loc %b_next\n"
8620 "%ret = OpLoad %v4f32 %result\n"
8621 " OpReturnValue %ret\n"
8625 fragments3["pre_main"] = typesAndConstants3;
8626 fragments3["testfun"] = function3;
8628 outputColors3[0] = RGBA(127, 127, 127, 255);
8629 outputColors3[1] = RGBA(127, 0, 0, 255);
8630 outputColors3[2] = RGBA(0, 0, 127, 255);
8631 outputColors3[3] = RGBA(0, 127, 0, 255);
8633 createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8635 const char typesAndConstants4[] =
8636 "%f16 = OpTypeFloat 16\n"
8637 "%v4f16 = OpTypeVector %f16 4\n"
8638 "%fp_f16 = OpTypePointer Function %f16\n"
8639 "%fp_v4f16 = OpTypePointer Function %v4f16\n"
8640 "%true = OpConstantTrue %bool\n"
8641 "%false = OpConstantFalse %bool\n"
8642 "%c_f32_p2 = OpConstant %f32 0.2\n";
8644 // Swap the second and the third element of the given parameter.
8645 const char function4[] =
8646 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8647 "%param = OpFunctionParameter %v4f32\n"
8648 "%entry = OpLabel\n"
8649 "%result = OpVariable %fp_v4f16 Function\n"
8650 "%param16 = OpFConvert %v4f16 %param\n"
8651 " OpStore %result %param16\n"
8652 "%a_loc = OpAccessChain %fp_f16 %result %c_i32_1\n"
8653 "%a_init = OpLoad %f16 %a_loc\n"
8654 "%b_loc = OpAccessChain %fp_f16 %result %c_i32_2\n"
8655 "%b_init = OpLoad %f16 %b_loc\n"
8659 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8660 "%a_next = OpPhi %f16 %a_init %entry %b_next %phi\n"
8661 "%b_next = OpPhi %f16 %b_init %entry %a_next %phi\n"
8662 " OpLoopMerge %exit %phi None\n"
8663 " OpBranchConditional %still_loop %phi %exit\n"
8666 " OpStore %a_loc %a_next\n"
8667 " OpStore %b_loc %b_next\n"
8668 "%ret16 = OpLoad %v4f16 %result\n"
8669 "%ret = OpFConvert %v4f32 %ret16\n"
8670 " OpReturnValue %ret\n"
8674 fragments4["pre_main"] = typesAndConstants4;
8675 fragments4["testfun"] = function4;
8676 fragments4["capability"] = "OpCapability Float16\n";
8678 extensions4.push_back("VK_KHR_shader_float16_int8");
8680 vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8682 outputColors4[0] = RGBA(127, 127, 127, 255);
8683 outputColors4[1] = RGBA(127, 0, 0, 255);
8684 outputColors4[2] = RGBA(0, 0, 127, 255);
8685 outputColors4[3] = RGBA(0, 127, 0, 255);
8687 createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8689 return group.release();
8692 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8694 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8695 RGBA inputColors[4];
8696 RGBA outputColors[4];
8698 // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8699 // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8700 // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8701 // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8702 const char constantsAndTypes[] =
8703 "%c_vec4_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8704 "%c_vec4_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8705 "%c_f32_1pl2_23 = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8706 "%c_f32_1mi2_23 = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8707 "%c_f32_n1pn24 = OpConstant %f32 -0x1p-24\n";
8709 const char function[] =
8710 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8711 "%param = OpFunctionParameter %v4f32\n"
8712 "%label = OpLabel\n"
8713 "%var1 = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8714 "%var2 = OpVariable %fp_f32 Function\n"
8715 "%red = OpCompositeExtract %f32 %param 0\n"
8716 "%plus_red = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8717 " OpStore %var2 %plus_red\n"
8718 "%val1 = OpLoad %f32 %var1\n"
8719 "%val2 = OpLoad %f32 %var2\n"
8720 "%mul = OpFMul %f32 %val1 %val2\n"
8721 "%add = OpFAdd %f32 %mul %c_f32_n1\n"
8722 "%is0 = OpFOrdEqual %bool %add %c_f32_0\n"
8723 "%isn1n24 = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8724 "%success = OpLogicalOr %bool %is0 %isn1n24\n"
8725 "%v4success = OpCompositeConstruct %v4bool %success %success %success %success\n"
8726 "%ret = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8727 " OpReturnValue %ret\n"
8730 struct CaseNameDecoration
8737 CaseNameDecoration tests[] = {
8738 {"multiplication", "OpDecorate %mul NoContraction"},
8739 {"addition", "OpDecorate %add NoContraction"},
8740 {"both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8743 getHalfColorsFullAlpha(inputColors);
8745 for (deUint8 idx = 0; idx < 4; ++idx)
8747 inputColors[idx].setRed(0);
8748 outputColors[idx] = RGBA(0, 0, 0, 255);
8751 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8753 map<string, string> fragments;
8755 fragments["decoration"] = tests[testNdx].decoration;
8756 fragments["pre_main"] = constantsAndTypes;
8757 fragments["testfun"] = function;
8759 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8762 return group.release();
8765 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8767 de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8770 const char constantsAndTypes[] =
8771 "%c_a2f32_1 = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8772 "%fp_a2f32 = OpTypePointer Function %a2f32\n"
8773 "%stype = OpTypeStruct %v4f32 %a2f32 %f32\n"
8774 "%fp_stype = OpTypePointer Function %stype\n";
8776 const char function[] =
8777 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8778 "%param1 = OpFunctionParameter %v4f32\n"
8780 "%v1 = OpVariable %fp_v4f32 Function\n"
8781 "%v2 = OpVariable %fp_a2f32 Function\n"
8782 "%v3 = OpVariable %fp_f32 Function\n"
8783 "%v = OpVariable %fp_stype Function\n"
8784 "%vv = OpVariable %fp_stype Function\n"
8785 "%vvv = OpVariable %fp_f32 Function\n"
8787 " OpStore %v1 %c_v4f32_1_1_1_1\n"
8788 " OpStore %v2 %c_a2f32_1\n"
8789 " OpStore %v3 %c_f32_1\n"
8791 "%p_v4f32 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8792 "%p_a2f32 = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8793 "%p_f32 = OpAccessChain %fp_f32 %v %c_u32_2\n"
8794 "%v1_v = OpLoad %v4f32 %v1 ${access_type}\n"
8795 "%v2_v = OpLoad %a2f32 %v2 ${access_type}\n"
8796 "%v3_v = OpLoad %f32 %v3 ${access_type}\n"
8798 " OpStore %p_v4f32 %v1_v ${access_type}\n"
8799 " OpStore %p_a2f32 %v2_v ${access_type}\n"
8800 " OpStore %p_f32 %v3_v ${access_type}\n"
8802 " OpCopyMemory %vv %v ${access_type}\n"
8803 " OpCopyMemory %vvv %p_f32 ${access_type}\n"
8805 "%p_f32_2 = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8806 "%v_f32_2 = OpLoad %f32 %p_f32_2\n"
8807 "%v_f32_3 = OpLoad %f32 %vvv\n"
8809 "%ret1 = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8810 "%ret2 = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8811 " OpReturnValue %ret2\n"
8814 struct NameMemoryAccess
8821 NameMemoryAccess tests[] =
8824 { "volatile", "Volatile" },
8825 { "aligned", "Aligned 1" },
8826 { "volatile_aligned", "Volatile|Aligned 1" },
8827 { "nontemporal_aligned", "Nontemporal|Aligned 1" },
8828 { "volatile_nontemporal", "Volatile|Nontemporal" },
8829 { "volatile_nontermporal_aligned", "Volatile|Nontemporal|Aligned 1" },
8832 getHalfColorsFullAlpha(colors);
8834 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8836 map<string, string> fragments;
8837 map<string, string> memoryAccess;
8838 memoryAccess["access_type"] = tests[testNdx].accessType;
8840 fragments["pre_main"] = constantsAndTypes;
8841 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8842 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8844 return memoryAccessTests.release();
8846 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8848 de::MovePtr<tcu::TestCaseGroup> opUndefTests (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8849 RGBA defaultColors[4];
8850 map<string, string> fragments;
8851 getDefaultColors(defaultColors);
8853 // First, simple cases that don't do anything with the OpUndef result.
8854 struct NameCodePair { string name, decl, type; };
8855 const NameCodePair tests[] =
8857 {"bool", "", "%bool"},
8858 {"vec2uint32", "", "%v2u32"},
8859 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8860 {"sampler", "%type = OpTypeSampler", "%type"},
8861 {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8862 {"pointer", "", "%fp_i32"},
8863 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8864 {"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8865 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8866 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8868 fragments["undef_type"] = tests[testNdx].type;
8869 fragments["testfun"] = StringTemplate(
8870 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8871 "%param1 = OpFunctionParameter %v4f32\n"
8872 "%label_testfun = OpLabel\n"
8873 "%undef = OpUndef ${undef_type}\n"
8874 "OpReturnValue %param1\n"
8875 "OpFunctionEnd\n").specialize(fragments);
8876 fragments["pre_main"] = tests[testNdx].decl;
8877 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8881 fragments["testfun"] =
8882 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8883 "%param1 = OpFunctionParameter %v4f32\n"
8884 "%label_testfun = OpLabel\n"
8885 "%undef = OpUndef %f32\n"
8886 "%zero = OpFMul %f32 %undef %c_f32_0\n"
8887 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8888 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8889 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8890 "%b = OpFAdd %f32 %a %actually_zero\n"
8891 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8892 "OpReturnValue %ret\n"
8895 createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8897 fragments["testfun"] =
8898 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8899 "%param1 = OpFunctionParameter %v4f32\n"
8900 "%label_testfun = OpLabel\n"
8901 "%undef = OpUndef %i32\n"
8902 "%zero = OpIMul %i32 %undef %c_i32_0\n"
8903 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8904 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8905 "OpReturnValue %ret\n"
8908 createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8910 fragments["testfun"] =
8911 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8912 "%param1 = OpFunctionParameter %v4f32\n"
8913 "%label_testfun = OpLabel\n"
8914 "%undef = OpUndef %u32\n"
8915 "%zero = OpIMul %u32 %undef %c_i32_0\n"
8916 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8917 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8918 "OpReturnValue %ret\n"
8921 createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8923 fragments["testfun"] =
8924 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8925 "%param1 = OpFunctionParameter %v4f32\n"
8926 "%label_testfun = OpLabel\n"
8927 "%undef = OpUndef %v4f32\n"
8928 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8929 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8930 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8931 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8932 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8933 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8934 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8935 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8936 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8937 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8938 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8939 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8940 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8941 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8942 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8943 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8944 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8945 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8946 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8947 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8948 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8949 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8950 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8951 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8952 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8953 "OpReturnValue %ret\n"
8956 createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8958 fragments["pre_main"] =
8959 "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8960 fragments["testfun"] =
8961 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8962 "%param1 = OpFunctionParameter %v4f32\n"
8963 "%label_testfun = OpLabel\n"
8964 "%undef = OpUndef %m2x2f32\n"
8965 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8966 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8967 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8968 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8969 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8970 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8971 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8972 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8973 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8974 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8975 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8976 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8977 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8978 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8979 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8980 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8981 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8982 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8983 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8984 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8985 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8986 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8987 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8988 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8989 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8990 "OpReturnValue %ret\n"
8993 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8995 return opUndefTests.release();
8998 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
9000 const RGBA inputColors[4] =
9003 RGBA(0, 0, 255, 255),
9004 RGBA(0, 255, 0, 255),
9005 RGBA(0, 255, 255, 255)
9008 const RGBA expectedColors[4] =
9010 RGBA(255, 0, 0, 255),
9011 RGBA(255, 0, 0, 255),
9012 RGBA(255, 0, 0, 255),
9013 RGBA(255, 0, 0, 255)
9016 const struct SingleFP16Possibility
9019 const char* constant; // Value to assign to %test_constant.
9021 const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9027 -constructNormalizedFloat(1, 0x300000),
9028 "%cond = OpFOrdEqual %bool %c %test_constant\n"
9033 constructNormalizedFloat(7, 0x000000),
9034 "%cond = OpFOrdEqual %bool %c %test_constant\n"
9036 // SPIR-V requires that OpQuantizeToF16 flushes
9037 // any numbers that would end up denormalized in F16 to zero.
9041 std::ldexp(1.5f, -140),
9042 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9047 -std::ldexp(1.5f, -140),
9048 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9053 std::ldexp(1.0f, -16),
9054 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9055 }, // too small positive
9057 "negative_too_small",
9059 -std::ldexp(1.0f, -32),
9060 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9061 }, // too small negative
9065 -std::ldexp(1.0f, 128),
9067 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9068 "%inf = OpIsInf %bool %c\n"
9069 "%cond = OpLogicalAnd %bool %gz %inf\n"
9074 std::ldexp(1.0f, 128),
9076 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9077 "%inf = OpIsInf %bool %c\n"
9078 "%cond = OpLogicalAnd %bool %gz %inf\n"
9081 "round_to_negative_inf",
9083 -std::ldexp(1.0f, 32),
9085 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9086 "%inf = OpIsInf %bool %c\n"
9087 "%cond = OpLogicalAnd %bool %gz %inf\n"
9092 std::ldexp(1.0f, 16),
9094 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9095 "%inf = OpIsInf %bool %c\n"
9096 "%cond = OpLogicalAnd %bool %gz %inf\n"
9101 std::numeric_limits<float>::quiet_NaN(),
9103 // Test for any NaN value, as NaNs are not preserved
9104 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9105 "%cond = OpIsNan %bool %direct_quant\n"
9110 std::numeric_limits<float>::quiet_NaN(),
9112 // Test for any NaN value, as NaNs are not preserved
9113 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9114 "%cond = OpIsNan %bool %direct_quant\n"
9117 const char* constants =
9118 "%test_constant = OpConstant %f32 "; // The value will be test.constant.
9120 StringTemplate function (
9121 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9122 "%param1 = OpFunctionParameter %v4f32\n"
9123 "%label_testfun = OpLabel\n"
9124 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9125 "%b = OpFAdd %f32 %test_constant %a\n"
9126 "%c = OpQuantizeToF16 %f32 %b\n"
9128 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9129 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9130 " OpReturnValue %retval\n"
9134 const char* specDecorations = "OpDecorate %test_constant SpecId 0\n";
9135 const char* specConstants =
9136 "%test_constant = OpSpecConstant %f32 0.\n"
9137 "%c = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9139 StringTemplate specConstantFunction(
9140 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9141 "%param1 = OpFunctionParameter %v4f32\n"
9142 "%label_testfun = OpLabel\n"
9144 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9145 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9146 " OpReturnValue %retval\n"
9150 for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9152 map<string, string> codeSpecialization;
9153 map<string, string> fragments;
9154 codeSpecialization["condition"] = tests[idx].condition;
9155 fragments["testfun"] = function.specialize(codeSpecialization);
9156 fragments["pre_main"] = string(constants) + tests[idx].constant + "\n";
9157 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9160 for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9162 map<string, string> codeSpecialization;
9163 map<string, string> fragments;
9164 SpecConstants passConstants;
9166 codeSpecialization["condition"] = tests[idx].condition;
9167 fragments["testfun"] = specConstantFunction.specialize(codeSpecialization);
9168 fragments["decoration"] = specDecorations;
9169 fragments["pre_main"] = specConstants;
9171 passConstants.append<float>(tests[idx].valueAsFloat);
9173 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9177 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
9179 RGBA inputColors[4] = {
9181 RGBA(0, 0, 255, 255),
9182 RGBA(0, 255, 0, 255),
9183 RGBA(0, 255, 255, 255)
9186 RGBA expectedColors[4] =
9188 RGBA(255, 0, 0, 255),
9189 RGBA(255, 0, 0, 255),
9190 RGBA(255, 0, 0, 255),
9191 RGBA(255, 0, 0, 255)
9194 struct DualFP16Possibility
9199 const char* possibleOutput1;
9200 const char* possibleOutput2;
9203 "positive_round_up_or_round_down",
9205 constructNormalizedFloat(8, 0x300300),
9210 "negative_round_up_or_round_down",
9212 -constructNormalizedFloat(-7, 0x600800),
9219 constructNormalizedFloat(2, 0x01e000),
9224 "carry_to_exponent",
9226 constructNormalizedFloat(1, 0xffe000),
9231 StringTemplate constants (
9232 "%input_const = OpConstant %f32 ${input}\n"
9233 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9234 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9237 StringTemplate specConstants (
9238 "%input_const = OpSpecConstant %f32 0.\n"
9239 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9240 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9243 const char* specDecorations = "OpDecorate %input_const SpecId 0\n";
9245 const char* function =
9246 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9247 "%param1 = OpFunctionParameter %v4f32\n"
9248 "%label_testfun = OpLabel\n"
9249 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9250 // For the purposes of this test we assume that 0.f will always get
9251 // faithfully passed through the pipeline stages.
9252 "%b = OpFAdd %f32 %input_const %a\n"
9253 "%c = OpQuantizeToF16 %f32 %b\n"
9254 "%eq_1 = OpFOrdEqual %bool %c %possible_solution1\n"
9255 "%eq_2 = OpFOrdEqual %bool %c %possible_solution2\n"
9256 "%cond = OpLogicalOr %bool %eq_1 %eq_2\n"
9257 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9258 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9259 " OpReturnValue %retval\n"
9262 for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9263 map<string, string> fragments;
9264 map<string, string> constantSpecialization;
9266 constantSpecialization["input"] = tests[idx].input;
9267 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9268 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9269 fragments["testfun"] = function;
9270 fragments["pre_main"] = constants.specialize(constantSpecialization);
9271 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9274 for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9275 map<string, string> fragments;
9276 map<string, string> constantSpecialization;
9277 SpecConstants passConstants;
9279 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9280 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9281 fragments["testfun"] = function;
9282 fragments["decoration"] = specDecorations;
9283 fragments["pre_main"] = specConstants.specialize(constantSpecialization);
9285 passConstants.append<float>(tests[idx].inputAsFloat);
9287 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9291 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
9293 de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
9294 createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9295 createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9296 return opQuantizeTests.release();
9299 struct ShaderPermutation
9301 deUint8 vertexPermutation;
9302 deUint8 geometryPermutation;
9303 deUint8 tesscPermutation;
9304 deUint8 tessePermutation;
9305 deUint8 fragmentPermutation;
9308 ShaderPermutation getShaderPermutation(deUint8 inputValue)
9310 ShaderPermutation permutation =
9312 static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
9313 static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
9314 static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
9315 static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
9316 static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
9321 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
9323 RGBA defaultColors[4];
9324 RGBA invertedColors[4];
9325 de::MovePtr<tcu::TestCaseGroup> moduleTests (new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
9327 getDefaultColors(defaultColors);
9328 getInvertedDefaultColors(invertedColors);
9330 // Combined module tests
9332 // Shader stages: vertex and fragment
9334 const ShaderElement combinedPipeline[] =
9336 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9337 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9340 addFunctionCaseWithPrograms<InstanceContext>(
9341 moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
9342 createInstanceContext(combinedPipeline, map<string, string>()));
9345 // Shader stages: vertex, geometry and fragment
9347 const ShaderElement combinedPipeline[] =
9349 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9350 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9351 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9354 addFunctionCaseWithPrograms<InstanceContext>(
9355 moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9356 createInstanceContext(combinedPipeline, map<string, string>()));
9359 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9361 const ShaderElement combinedPipeline[] =
9363 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9364 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9365 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9366 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9369 addFunctionCaseWithPrograms<InstanceContext>(
9370 moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
9371 createInstanceContext(combinedPipeline, map<string, string>()));
9374 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9376 const ShaderElement combinedPipeline[] =
9378 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9379 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9380 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9381 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9382 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9385 addFunctionCaseWithPrograms<InstanceContext>(
9386 moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9387 createInstanceContext(combinedPipeline, map<string, string>()));
9391 const char* numbers[] =
9396 for (deInt8 idx = 0; idx < 32; ++idx)
9398 ShaderPermutation permutation = getShaderPermutation(idx);
9399 string name = string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
9400 const ShaderElement pipeline[] =
9402 ShaderElement("vert", string("vert") + numbers[permutation.vertexPermutation], VK_SHADER_STAGE_VERTEX_BIT),
9403 ShaderElement("geom", string("geom") + numbers[permutation.geometryPermutation], VK_SHADER_STAGE_GEOMETRY_BIT),
9404 ShaderElement("tessc", string("tessc") + numbers[permutation.tesscPermutation], VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9405 ShaderElement("tesse", string("tesse") + numbers[permutation.tessePermutation], VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9406 ShaderElement("frag", string("frag") + numbers[permutation.fragmentPermutation], VK_SHADER_STAGE_FRAGMENT_BIT)
9409 // If there are an even number of swaps, then it should be no-op.
9410 // If there are an odd number, the color should be flipped.
9411 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
9413 addFunctionCaseWithPrograms<InstanceContext>(
9414 moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9415 createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9419 addFunctionCaseWithPrograms<InstanceContext>(
9420 moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9421 createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9424 return moduleTests.release();
9427 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
9431 case SHADER_TASK_NONE: return "";
9432 case SHADER_TASK_NORMAL: return prefix + "_normal";
9433 case SHADER_TASK_UNUSED_VAR: return prefix + "_unused_var";
9434 case SHADER_TASK_UNUSED_FUNC: return prefix + "_unused_func";
9435 default: DE_ASSERT(DE_FALSE);
9441 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9445 case SHADER_TASK_INDEX_VERTEX: return "vertex";
9446 case SHADER_TASK_INDEX_GEOMETRY: return "geom";
9447 case SHADER_TASK_INDEX_TESS_CONTROL: return "tessc";
9448 case SHADER_TASK_INDEX_TESS_EVAL: return "tesse";
9449 case SHADER_TASK_INDEX_FRAGMENT: return "frag";
9450 default: DE_ASSERT(DE_FALSE);
9456 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9458 std::string testName = location.toString();
9460 for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9462 if (shaderTasks[i] != SHADER_TASK_NONE)
9464 testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9471 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9473 de::MovePtr<tcu::TestCaseGroup> moduleTests (new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9475 ShaderTaskArray shaderCombinations[] =
9477 // Vertex Geometry Tess. Control Tess. Evaluation Fragment
9478 { SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9479 { SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9480 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR },
9481 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC },
9482 { SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9483 { SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9484 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL },
9485 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL },
9486 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL },
9487 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL }
9490 const VariableLocation testLocations[] =
9497 for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9499 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9501 const ShaderTaskArray& shaderTasks = shaderCombinations[combNdx];
9502 const VariableLocation& location = testLocations[locationNdx];
9503 std::string testName = getUnusedVarTestName(shaderTasks, location);
9505 addFunctionCaseWithPrograms<UnusedVariableContext>(
9506 moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9507 createUnusedVariableContext(shaderTasks, location));
9511 return moduleTests.release();
9514 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9516 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9517 RGBA defaultColors[4];
9518 getDefaultColors(defaultColors);
9519 map<string, string> fragments;
9520 fragments["pre_main"] =
9521 "%c_f32_5 = OpConstant %f32 5.\n";
9523 // A loop with a single block. The Continue Target is the loop block
9524 // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9525 // -- the "continue construct" forms the entire loop.
9526 fragments["testfun"] =
9527 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9528 "%param1 = OpFunctionParameter %v4f32\n"
9530 "%entry = OpLabel\n"
9531 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9534 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9536 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9537 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9538 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9539 "%val = OpFAdd %f32 %val1 %delta\n"
9540 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9541 "%count__ = OpISub %i32 %count %c_i32_1\n"
9542 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9543 "OpLoopMerge %exit %loop None\n"
9544 "OpBranchConditional %again %loop %exit\n"
9547 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9548 "OpReturnValue %result\n"
9552 createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9554 // Body comprised of multiple basic blocks.
9555 const StringTemplate multiBlock(
9556 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9557 "%param1 = OpFunctionParameter %v4f32\n"
9559 "%entry = OpLabel\n"
9560 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9563 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9565 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9566 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9567 "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9568 // There are several possibilities for the Continue Target below. Each
9569 // will be specialized into a separate test case.
9570 "OpLoopMerge %exit ${continue_target} None\n"
9574 ";delta_next = (delta > 0) ? -1 : 1;\n"
9575 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9576 "OpSelectionMerge %gather DontFlatten\n"
9577 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9580 "OpBranch %gather\n"
9583 "OpBranch %gather\n"
9585 "%gather = OpLabel\n"
9586 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9587 "%val = OpFAdd %f32 %val1 %delta\n"
9588 "%count__ = OpISub %i32 %count %c_i32_1\n"
9592 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9593 "OpBranchConditional %again %loop %exit\n"
9596 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9597 "OpReturnValue %result\n"
9601 map<string, string> continue_target;
9603 // The Continue Target is the loop block itself.
9604 continue_target["continue_target"] = "%if";
9605 fragments["testfun"] = multiBlock.specialize(continue_target);
9606 createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9608 // The Continue Target is at the end of the loop.
9609 continue_target["continue_target"] = "%cont";
9610 fragments["testfun"] = multiBlock.specialize(continue_target);
9611 createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9613 // A loop with continue statement.
9614 fragments["testfun"] =
9615 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9616 "%param1 = OpFunctionParameter %v4f32\n"
9618 "%entry = OpLabel\n"
9619 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9622 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9624 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9625 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9626 "OpLoopMerge %exit %continue None\n"
9630 ";skip if %count==2\n"
9631 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9632 "OpBranchConditional %eq2 %continue %body\n"
9635 "%fcount = OpConvertSToF %f32 %count\n"
9636 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9637 "OpBranch %continue\n"
9639 "%continue = OpLabel\n"
9640 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9641 "%count__ = OpISub %i32 %count %c_i32_1\n"
9642 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9643 "OpBranchConditional %again %loop %exit\n"
9646 "%same = OpFSub %f32 %val %c_f32_8\n"
9647 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9648 "OpReturnValue %result\n"
9650 createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9652 // A loop with break.
9653 fragments["testfun"] =
9654 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9655 "%param1 = OpFunctionParameter %v4f32\n"
9657 "%entry = OpLabel\n"
9658 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9659 "%dot = OpDot %f32 %param1 %param1\n"
9660 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9661 "%zero = OpConvertFToU %u32 %div\n"
9662 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9663 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9666 ";adds 4 and 3 to %val0 (exits early)\n"
9668 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9669 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9670 "OpLoopMerge %exit %continue None\n"
9674 ";end loop if %count==%two\n"
9675 "%above2 = OpSGreaterThan %bool %count %two\n"
9676 "OpBranchConditional %above2 %body %exit\n"
9679 "%fcount = OpConvertSToF %f32 %count\n"
9680 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9681 "OpBranch %continue\n"
9683 "%continue = OpLabel\n"
9684 "%count__ = OpISub %i32 %count %c_i32_1\n"
9685 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9686 "OpBranchConditional %again %loop %exit\n"
9689 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9690 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9691 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9692 "OpReturnValue %result\n"
9694 createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9696 // A loop with return.
9697 fragments["testfun"] =
9698 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9699 "%param1 = OpFunctionParameter %v4f32\n"
9701 "%entry = OpLabel\n"
9702 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9703 "%dot = OpDot %f32 %param1 %param1\n"
9704 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9705 "%zero = OpConvertFToU %u32 %div\n"
9706 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9707 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9710 ";returns early without modifying %param1\n"
9712 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9713 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9714 "OpLoopMerge %exit %continue None\n"
9718 ";return if %count==%two\n"
9719 "%above2 = OpSGreaterThan %bool %count %two\n"
9720 "OpSelectionMerge %body DontFlatten\n"
9721 "OpBranchConditional %above2 %body %early_exit\n"
9723 "%early_exit = OpLabel\n"
9724 "OpReturnValue %param1\n"
9727 "%fcount = OpConvertSToF %f32 %count\n"
9728 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9729 "OpBranch %continue\n"
9731 "%continue = OpLabel\n"
9732 "%count__ = OpISub %i32 %count %c_i32_1\n"
9733 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9734 "OpBranchConditional %again %loop %exit\n"
9737 ";should never get here, so return an incorrect result\n"
9738 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9739 "OpReturnValue %result\n"
9741 createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9743 // Continue inside a switch block to break to enclosing loop's merge block.
9744 // Matches roughly the following GLSL code:
9745 // for (; keep_going; keep_going = false)
9747 // switch (int(param1.x))
9749 // case 0: continue;
9750 // case 1: continue;
9751 // default: continue;
9753 // dead code: modify return value to invalid result.
9755 fragments["pre_main"] =
9756 "%fp_bool = OpTypePointer Function %bool\n"
9757 "%true = OpConstantTrue %bool\n"
9758 "%false = OpConstantFalse %bool\n";
9760 fragments["testfun"] =
9761 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9762 "%param1 = OpFunctionParameter %v4f32\n"
9764 "%entry = OpLabel\n"
9765 "%keep_going = OpVariable %fp_bool Function\n"
9766 "%val_ptr = OpVariable %fp_f32 Function\n"
9767 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
9768 "OpStore %keep_going %true\n"
9769 "OpBranch %forloop_begin\n"
9771 "%forloop_begin = OpLabel\n"
9772 "OpLoopMerge %forloop_merge %forloop_continue None\n"
9773 "OpBranch %forloop\n"
9775 "%forloop = OpLabel\n"
9776 "%for_condition = OpLoad %bool %keep_going\n"
9777 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9779 "%forloop_body = OpLabel\n"
9780 "OpStore %val_ptr %param1_x\n"
9781 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9783 "OpSelectionMerge %switch_merge None\n"
9784 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9785 "%case_0 = OpLabel\n"
9786 "OpBranch %forloop_continue\n"
9787 "%case_1 = OpLabel\n"
9788 "OpBranch %forloop_continue\n"
9789 "%default = OpLabel\n"
9790 "OpBranch %forloop_continue\n"
9791 "%switch_merge = OpLabel\n"
9792 ";should never get here, so change the return value to invalid result\n"
9793 "OpStore %val_ptr %c_f32_1\n"
9794 "OpBranch %forloop_continue\n"
9796 "%forloop_continue = OpLabel\n"
9797 "OpStore %keep_going %false\n"
9798 "OpBranch %forloop_begin\n"
9799 "%forloop_merge = OpLabel\n"
9801 "%val = OpLoad %f32 %val_ptr\n"
9802 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9803 "OpReturnValue %result\n"
9805 createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9807 return testGroup.release();
9810 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
9811 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9813 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9814 map<string, string> fragments;
9816 // A barrier inside a function body.
9817 fragments["pre_main"] =
9818 "%Workgroup = OpConstant %i32 2\n"
9819 "%Invocation = OpConstant %i32 4\n"
9820 "%MemorySemanticsNone = OpConstant %i32 0\n";
9821 fragments["testfun"] =
9822 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9823 "%param1 = OpFunctionParameter %v4f32\n"
9824 "%label_testfun = OpLabel\n"
9825 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9826 "OpReturnValue %param1\n"
9828 addTessCtrlTest(testGroup.get(), "in_function", fragments);
9830 // Common setup code for the following tests.
9831 fragments["pre_main"] =
9832 "%Workgroup = OpConstant %i32 2\n"
9833 "%Invocation = OpConstant %i32 4\n"
9834 "%MemorySemanticsNone = OpConstant %i32 0\n"
9835 "%c_f32_5 = OpConstant %f32 5.\n";
9836 const string setupPercentZero = // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9837 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9838 "%param1 = OpFunctionParameter %v4f32\n"
9839 "%entry = OpLabel\n"
9840 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9841 "%dot = OpDot %f32 %param1 %param1\n"
9842 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9843 "%zero = OpConvertFToU %u32 %div\n";
9845 // Barriers inside OpSwitch branches.
9846 fragments["testfun"] =
9848 "OpSelectionMerge %switch_exit None\n"
9849 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9851 "%case1 = OpLabel\n"
9852 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9853 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9854 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9855 "OpBranch %switch_exit\n"
9857 "%switch_default = OpLabel\n"
9858 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9859 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9860 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9861 "OpBranch %switch_exit\n"
9863 "%case0 = OpLabel\n"
9864 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9865 "OpBranch %switch_exit\n"
9867 "%switch_exit = OpLabel\n"
9868 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9869 "OpReturnValue %ret\n"
9871 addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9873 // Barriers inside if-then-else.
9874 fragments["testfun"] =
9876 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9877 "OpSelectionMerge %exit DontFlatten\n"
9878 "OpBranchConditional %eq0 %then %else\n"
9881 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9882 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9883 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9887 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9890 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9891 "OpReturnValue %ret\n"
9893 addTessCtrlTest(testGroup.get(), "in_if", fragments);
9895 // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9896 // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9897 fragments["testfun"] =
9899 "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9900 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9901 "OpSelectionMerge %exit DontFlatten\n"
9902 "OpBranchConditional %thread0 %then %else\n"
9905 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9909 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9913 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
9914 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9915 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9916 "OpReturnValue %ret\n"
9918 addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9920 // A barrier inside a loop.
9921 fragments["pre_main"] =
9922 "%Workgroup = OpConstant %i32 2\n"
9923 "%Invocation = OpConstant %i32 4\n"
9924 "%MemorySemanticsNone = OpConstant %i32 0\n"
9925 "%c_f32_10 = OpConstant %f32 10.\n";
9926 fragments["testfun"] =
9927 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9928 "%param1 = OpFunctionParameter %v4f32\n"
9929 "%entry = OpLabel\n"
9930 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9933 ";adds 4, 3, 2, and 1 to %val0\n"
9935 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9936 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9937 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9938 "%fcount = OpConvertSToF %f32 %count\n"
9939 "%val = OpFAdd %f32 %val1 %fcount\n"
9940 "%count__ = OpISub %i32 %count %c_i32_1\n"
9941 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9942 "OpLoopMerge %exit %loop None\n"
9943 "OpBranchConditional %again %loop %exit\n"
9946 "%same = OpFSub %f32 %val %c_f32_10\n"
9947 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9948 "OpReturnValue %ret\n"
9950 addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9952 return testGroup.release();
9955 // Test for the OpFRem instruction.
9956 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9958 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9959 map<string, string> fragments;
9960 RGBA inputColors[4];
9961 RGBA outputColors[4];
9963 fragments["pre_main"] =
9964 "%c_f32_3 = OpConstant %f32 3.0\n"
9965 "%c_f32_n3 = OpConstant %f32 -3.0\n"
9966 "%c_f32_4 = OpConstant %f32 4.0\n"
9967 "%c_f32_p75 = OpConstant %f32 0.75\n"
9968 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9969 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9970 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9972 // The test does the following.
9973 // vec4 result = (param1 * 8.0) - 4.0;
9974 // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9975 fragments["testfun"] =
9976 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9977 "%param1 = OpFunctionParameter %v4f32\n"
9978 "%label_testfun = OpLabel\n"
9979 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9980 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9981 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9982 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9983 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9984 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9985 "OpReturnValue %xy_0_1\n"
9989 inputColors[0] = RGBA(16, 16, 0, 255);
9990 inputColors[1] = RGBA(232, 232, 0, 255);
9991 inputColors[2] = RGBA(232, 16, 0, 255);
9992 inputColors[3] = RGBA(16, 232, 0, 255);
9994 outputColors[0] = RGBA(64, 64, 0, 255);
9995 outputColors[1] = RGBA(255, 255, 0, 255);
9996 outputColors[2] = RGBA(255, 64, 0, 255);
9997 outputColors[3] = RGBA(64, 255, 0, 255);
9999 createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
10000 return testGroup.release();
10003 // Test for the OpSRem instruction.
10004 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10006 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
10007 map<string, string> fragments;
10009 fragments["pre_main"] =
10010 "%c_f32_255 = OpConstant %f32 255.0\n"
10011 "%c_i32_128 = OpConstant %i32 128\n"
10012 "%c_i32_255 = OpConstant %i32 255\n"
10013 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10014 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10015 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10017 // The test does the following.
10018 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10019 // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10020 // return float(result + 128) / 255.0;
10021 fragments["testfun"] =
10022 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10023 "%param1 = OpFunctionParameter %v4f32\n"
10024 "%label_testfun = OpLabel\n"
10025 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10026 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10027 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10028 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10029 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10030 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10031 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10032 "%x_out = OpSRem %i32 %x_in %y_in\n"
10033 "%y_out = OpSRem %i32 %y_in %z_in\n"
10034 "%z_out = OpSRem %i32 %z_in %x_in\n"
10035 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10036 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10037 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10038 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10039 "OpReturnValue %float_out\n"
10042 const struct CaseParams
10045 const char* failMessageTemplate; // customized status message
10046 qpTestResult failResult; // override status on failure
10047 int operands[4][3]; // four (x, y, z) vectors of operands
10048 int results[4][3]; // four (x, y, z) vectors of results
10054 QP_TEST_RESULT_FAIL,
10055 { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
10056 { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
10060 "Inconsistent results, but within specification: ${reason}",
10061 negFailResult, // negative operands, not required by the spec
10062 { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
10063 { { 5, 12, -2 }, { 0, -5, 2 }, { 3, 8, -6 }, { 25, -60, 0 } }, // results
10066 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10068 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10070 const CaseParams& params = cases[caseNdx];
10071 RGBA inputColors[4];
10072 RGBA outputColors[4];
10074 for (int i = 0; i < 4; ++i)
10076 inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10077 outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10080 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10083 return testGroup.release();
10086 // Test for the OpSMod instruction.
10087 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10089 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
10090 map<string, string> fragments;
10092 fragments["pre_main"] =
10093 "%c_f32_255 = OpConstant %f32 255.0\n"
10094 "%c_i32_128 = OpConstant %i32 128\n"
10095 "%c_i32_255 = OpConstant %i32 255\n"
10096 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10097 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10098 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10100 // The test does the following.
10101 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10102 // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10103 // return float(result + 128) / 255.0;
10104 fragments["testfun"] =
10105 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10106 "%param1 = OpFunctionParameter %v4f32\n"
10107 "%label_testfun = OpLabel\n"
10108 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10109 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10110 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10111 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10112 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10113 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10114 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10115 "%x_out = OpSMod %i32 %x_in %y_in\n"
10116 "%y_out = OpSMod %i32 %y_in %z_in\n"
10117 "%z_out = OpSMod %i32 %z_in %x_in\n"
10118 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10119 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10120 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10121 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10122 "OpReturnValue %float_out\n"
10125 const struct CaseParams
10128 const char* failMessageTemplate; // customized status message
10129 qpTestResult failResult; // override status on failure
10130 int operands[4][3]; // four (x, y, z) vectors of operands
10131 int results[4][3]; // four (x, y, z) vectors of results
10137 QP_TEST_RESULT_FAIL,
10138 { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
10139 { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
10143 "Inconsistent results, but within specification: ${reason}",
10144 negFailResult, // negative operands, not required by the spec
10145 { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
10146 { { 5, -5, 3 }, { 0, 2, -3 }, { 3, -73, 69 }, { -35, 40, 0 } }, // results
10149 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10151 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10153 const CaseParams& params = cases[caseNdx];
10154 RGBA inputColors[4];
10155 RGBA outputColors[4];
10157 for (int i = 0; i < 4; ++i)
10159 inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10160 outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10163 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10165 return testGroup.release();
10168 enum ConversionDataType
10170 DATA_TYPE_SIGNED_8,
10171 DATA_TYPE_SIGNED_16,
10172 DATA_TYPE_SIGNED_32,
10173 DATA_TYPE_SIGNED_64,
10174 DATA_TYPE_UNSIGNED_8,
10175 DATA_TYPE_UNSIGNED_16,
10176 DATA_TYPE_UNSIGNED_32,
10177 DATA_TYPE_UNSIGNED_64,
10178 DATA_TYPE_FLOAT_16,
10179 DATA_TYPE_FLOAT_32,
10180 DATA_TYPE_FLOAT_64,
10181 DATA_TYPE_VEC2_SIGNED_16,
10182 DATA_TYPE_VEC2_SIGNED_32
10185 const string getBitWidthStr (ConversionDataType type)
10189 case DATA_TYPE_SIGNED_8:
10190 case DATA_TYPE_UNSIGNED_8:
10193 case DATA_TYPE_SIGNED_16:
10194 case DATA_TYPE_UNSIGNED_16:
10195 case DATA_TYPE_FLOAT_16:
10198 case DATA_TYPE_SIGNED_32:
10199 case DATA_TYPE_UNSIGNED_32:
10200 case DATA_TYPE_FLOAT_32:
10201 case DATA_TYPE_VEC2_SIGNED_16:
10204 case DATA_TYPE_SIGNED_64:
10205 case DATA_TYPE_UNSIGNED_64:
10206 case DATA_TYPE_FLOAT_64:
10207 case DATA_TYPE_VEC2_SIGNED_32:
10216 const string getByteWidthStr (ConversionDataType type)
10220 case DATA_TYPE_SIGNED_8:
10221 case DATA_TYPE_UNSIGNED_8:
10224 case DATA_TYPE_SIGNED_16:
10225 case DATA_TYPE_UNSIGNED_16:
10226 case DATA_TYPE_FLOAT_16:
10229 case DATA_TYPE_SIGNED_32:
10230 case DATA_TYPE_UNSIGNED_32:
10231 case DATA_TYPE_FLOAT_32:
10232 case DATA_TYPE_VEC2_SIGNED_16:
10235 case DATA_TYPE_SIGNED_64:
10236 case DATA_TYPE_UNSIGNED_64:
10237 case DATA_TYPE_FLOAT_64:
10238 case DATA_TYPE_VEC2_SIGNED_32:
10247 bool isSigned (ConversionDataType type)
10251 case DATA_TYPE_SIGNED_8:
10252 case DATA_TYPE_SIGNED_16:
10253 case DATA_TYPE_SIGNED_32:
10254 case DATA_TYPE_SIGNED_64:
10255 case DATA_TYPE_FLOAT_16:
10256 case DATA_TYPE_FLOAT_32:
10257 case DATA_TYPE_FLOAT_64:
10258 case DATA_TYPE_VEC2_SIGNED_16:
10259 case DATA_TYPE_VEC2_SIGNED_32:
10262 case DATA_TYPE_UNSIGNED_8:
10263 case DATA_TYPE_UNSIGNED_16:
10264 case DATA_TYPE_UNSIGNED_32:
10265 case DATA_TYPE_UNSIGNED_64:
10274 bool isInt (ConversionDataType type)
10278 case DATA_TYPE_SIGNED_8:
10279 case DATA_TYPE_SIGNED_16:
10280 case DATA_TYPE_SIGNED_32:
10281 case DATA_TYPE_SIGNED_64:
10282 case DATA_TYPE_UNSIGNED_8:
10283 case DATA_TYPE_UNSIGNED_16:
10284 case DATA_TYPE_UNSIGNED_32:
10285 case DATA_TYPE_UNSIGNED_64:
10288 case DATA_TYPE_FLOAT_16:
10289 case DATA_TYPE_FLOAT_32:
10290 case DATA_TYPE_FLOAT_64:
10291 case DATA_TYPE_VEC2_SIGNED_16:
10292 case DATA_TYPE_VEC2_SIGNED_32:
10301 bool isFloat (ConversionDataType type)
10305 case DATA_TYPE_SIGNED_8:
10306 case DATA_TYPE_SIGNED_16:
10307 case DATA_TYPE_SIGNED_32:
10308 case DATA_TYPE_SIGNED_64:
10309 case DATA_TYPE_UNSIGNED_8:
10310 case DATA_TYPE_UNSIGNED_16:
10311 case DATA_TYPE_UNSIGNED_32:
10312 case DATA_TYPE_UNSIGNED_64:
10313 case DATA_TYPE_VEC2_SIGNED_16:
10314 case DATA_TYPE_VEC2_SIGNED_32:
10317 case DATA_TYPE_FLOAT_16:
10318 case DATA_TYPE_FLOAT_32:
10319 case DATA_TYPE_FLOAT_64:
10328 const string getTypeName (ConversionDataType type)
10330 string prefix = isSigned(type) ? "" : "u";
10332 if (isInt(type)) return prefix + "int" + getBitWidthStr(type);
10333 else if (isFloat(type)) return prefix + "float" + getBitWidthStr(type);
10334 else if (type == DATA_TYPE_VEC2_SIGNED_16) return "i16vec2";
10335 else if (type == DATA_TYPE_VEC2_SIGNED_32) return "i32vec2";
10336 else DE_ASSERT(false);
10341 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
10343 const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10345 return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10348 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
10352 if (isInt(type)) prefix = isSigned(type) ? "i" : "u";
10353 else if (isFloat(type)) prefix = "f";
10354 else if (type == DATA_TYPE_VEC2_SIGNED_16) return "i16vec2";
10355 else if (type == DATA_TYPE_VEC2_SIGNED_32) return "v2i32";
10356 else DE_ASSERT(false);
10357 if ((isInt(type) || isFloat(type)) && elements == 2)
10359 prefix = "v2" + prefix;
10362 return prefix + getBitWidthStr(type);
10365 template<typename T>
10366 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
10368 return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10371 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
10375 case DATA_TYPE_SIGNED_8: return getSpecializedBuffer<deInt8>(number, elements);
10376 case DATA_TYPE_SIGNED_16: return getSpecializedBuffer<deInt16>(number, elements);
10377 case DATA_TYPE_SIGNED_32: return getSpecializedBuffer<deInt32>(number, elements);
10378 case DATA_TYPE_SIGNED_64: return getSpecializedBuffer<deInt64>(number, elements);
10379 case DATA_TYPE_UNSIGNED_8: return getSpecializedBuffer<deUint8>(number, elements);
10380 case DATA_TYPE_UNSIGNED_16: return getSpecializedBuffer<deUint16>(number, elements);
10381 case DATA_TYPE_UNSIGNED_32: return getSpecializedBuffer<deUint32>(number, elements);
10382 case DATA_TYPE_UNSIGNED_64: return getSpecializedBuffer<deUint64>(number, elements);
10383 case DATA_TYPE_FLOAT_16: return getSpecializedBuffer<deUint16>(number, elements);
10384 case DATA_TYPE_FLOAT_32: return getSpecializedBuffer<deUint32>(number, elements);
10385 case DATA_TYPE_FLOAT_64: return getSpecializedBuffer<deUint64>(number, elements);
10386 case DATA_TYPE_VEC2_SIGNED_16: return getSpecializedBuffer<deUint32>(number, elements);
10387 case DATA_TYPE_VEC2_SIGNED_32: return getSpecializedBuffer<deUint64>(number, elements);
10389 default: TCU_THROW(InternalError, "Unimplemented type passed");
10393 bool usesInt8 (ConversionDataType from, ConversionDataType to)
10395 return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
10396 from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
10399 bool usesInt16 (ConversionDataType from, ConversionDataType to)
10401 return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
10402 from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
10403 from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10406 bool usesInt32 (ConversionDataType from, ConversionDataType to)
10408 return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
10409 from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
10410 from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
10413 bool usesInt64 (ConversionDataType from, ConversionDataType to)
10415 return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
10416 from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
10419 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
10421 return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10424 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
10426 return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10429 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
10431 return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10434 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
10436 if (usesInt16(from, to) && !usesInt32(from, to))
10437 vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
10439 if (usesInt64(from, to))
10440 vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
10442 if (usesFloat64(from, to))
10443 vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
10445 if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10447 extensions.push_back("VK_KHR_16bit_storage");
10448 vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10451 if (usesFloat16(from, to) || usesInt8(from, to))
10453 extensions.push_back("VK_KHR_shader_float16_int8");
10455 if (usesFloat16(from, to))
10457 vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10460 if (usesInt8(from, to))
10462 vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10464 extensions.push_back("VK_KHR_8bit_storage");
10465 vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10472 ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10473 : m_fromType (from)
10476 , m_useStorageExt (useStorageExt)
10477 , m_name (getTestName(from, to, suffix))
10483 m_asmTypes["inStorageType"] = getAsmTypeName(from);
10484 m_asmTypes["outStorageType"] = getAsmTypeName(to);
10485 m_asmTypes["inCast"] = "OpCopyObject";
10486 m_asmTypes["outCast"] = "OpCopyObject";
10487 // If the storage extensions are being avoided, tests instead uses
10488 // vectors so that they are easily convertible to 32-bit integers.
10489 // |m_elements| indicates the size of the vector. It modifies how many
10490 // items added to the buffers and converted in the tests.
10492 // Currently only supports 1 (default) or 2 elements.
10493 if (!m_useStorageExt)
10495 bool in_change = false;
10496 bool out_change = false;
10497 if (usesFloat16(from, from) || usesInt16(from, from))
10499 m_asmTypes["inStorageType"] = "u32";
10500 m_asmTypes["inCast"] = "OpBitcast";
10504 if (usesFloat16(to, to) || usesInt16(to, to))
10506 m_asmTypes["outStorageType"] = "u32";
10507 m_asmTypes["outCast"] = "OpBitcast";
10511 if (in_change && !out_change)
10513 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10515 if (!in_change && out_change)
10517 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10521 // Safety check for implementation.
10522 if (m_elements < 1 || m_elements > 2)
10523 TCU_THROW(InternalError, "Unsupported number of elements");
10525 m_asmTypes["inputType"] = getAsmTypeName(from, m_elements);
10526 m_asmTypes["outputType"] = getAsmTypeName(to, m_elements);
10528 m_inputBuffer = getBuffer(from, number, m_elements);
10529 if (separateOutput)
10530 m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10532 m_outputBuffer = getBuffer(to, number, m_elements);
10534 if (usesInt8(from, to))
10536 bool requiresInt8Capability = true;
10537 if (instruction == "OpUConvert" || instruction == "OpSConvert")
10539 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10540 if (usesInt32(from, to))
10541 requiresInt8Capability = false;
10544 caps += "OpCapability StorageBuffer8BitAccess\n";
10545 if (requiresInt8Capability)
10546 caps += "OpCapability Int8\n";
10548 decl += "%i8 = OpTypeInt 8 1\n"
10549 "%u8 = OpTypeInt 8 0\n";
10551 if (m_elements == 2)
10553 decl += "%v2i8 = OpTypeVector %i8 2\n"
10554 "%v2u8 = OpTypeVector %u8 2\n";
10556 exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10559 if (usesInt16(from, to))
10561 bool requiresInt16Capability = true;
10563 if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10565 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10566 if (usesInt32(from, to) || usesFloat32(from, to))
10567 requiresInt16Capability = false;
10570 decl += "%i16 = OpTypeInt 16 1\n"
10571 "%u16 = OpTypeInt 16 0\n";
10572 if (m_elements == 2)
10574 decl += "%v2i16 = OpTypeVector %i16 2\n"
10575 "%v2u16 = OpTypeVector %u16 2\n";
10579 decl += "%i16vec2 = OpTypeVector %i16 2\n";
10582 // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10583 if (requiresInt16Capability || !m_useStorageExt)
10584 caps += "OpCapability Int16\n";
10587 if (usesFloat16(from, to))
10589 decl += "%f16 = OpTypeFloat 16\n";
10590 if (m_elements == 2)
10592 decl += "%v2f16 = OpTypeVector %f16 2\n";
10595 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10596 if (!usesFloat32(from, to) || !m_useStorageExt)
10597 caps += "OpCapability Float16\n";
10600 if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10602 caps += "OpCapability StorageUniformBufferBlock16\n";
10603 exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10606 if (usesInt64(from, to))
10608 caps += "OpCapability Int64\n";
10609 decl += "%i64 = OpTypeInt 64 1\n"
10610 "%u64 = OpTypeInt 64 0\n";
10611 if (m_elements == 2)
10613 decl += "%v2i64 = OpTypeVector %i64 2\n"
10614 "%v2u64 = OpTypeVector %u64 2\n";
10618 if (usesFloat64(from, to))
10620 caps += "OpCapability Float64\n";
10621 decl += "%f64 = OpTypeFloat 64\n";
10622 if (m_elements == 2)
10624 decl += "%v2f64 = OpTypeVector %f64 2\n";
10628 m_asmTypes["datatype_capabilities"] = caps;
10629 m_asmTypes["datatype_additional_decl"] = decl;
10630 m_asmTypes["datatype_extensions"] = exts;
10633 ConversionDataType m_fromType;
10634 ConversionDataType m_toType;
10635 deUint32 m_elements;
10636 bool m_useStorageExt;
10638 map<string, string> m_asmTypes;
10639 BufferSp m_inputBuffer;
10640 BufferSp m_outputBuffer;
10643 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10645 map<string, string> params = convertCase.m_asmTypes;
10647 params["instruction"] = instruction;
10648 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
10649 params["outDecorator"] = getByteWidthStr(convertCase.m_toType);
10651 std::string shader (
10652 "OpCapability Shader\n"
10653 "${datatype_capabilities}"
10654 "${datatype_extensions:opt}"
10655 "OpMemoryModel Logical GLSL450\n"
10656 "OpEntryPoint GLCompute %main \"main\"\n"
10657 "OpExecutionMode %main LocalSize 1 1 1\n"
10658 "OpSource GLSL 430\n"
10659 "OpName %main \"main\"\n"
10661 "OpDecorate %indata DescriptorSet 0\n"
10662 "OpDecorate %indata Binding 0\n"
10663 "OpDecorate %outdata DescriptorSet 0\n"
10664 "OpDecorate %outdata Binding 1\n"
10665 "OpDecorate %in_buf BufferBlock\n"
10666 "OpDecorate %out_buf BufferBlock\n"
10667 "OpMemberDecorate %in_buf 0 Offset 0\n"
10668 "OpMemberDecorate %out_buf 0 Offset 0\n"
10670 "%void = OpTypeVoid\n"
10671 "%voidf = OpTypeFunction %void\n"
10672 "%u32 = OpTypeInt 32 0\n"
10673 "%i32 = OpTypeInt 32 1\n"
10674 "%f32 = OpTypeFloat 32\n"
10675 "%v2i32 = OpTypeVector %i32 2\n"
10676 "${datatype_additional_decl}"
10680 shader += "%v2u32 = OpTypeVector %u32 2\n"
10681 "%v2f32 = OpTypeVector %f32 2\n";
10684 "%uvec3 = OpTypeVector %u32 3\n"
10686 "%in_ptr = OpTypePointer Uniform %${inStorageType}\n"
10687 "%out_ptr = OpTypePointer Uniform %${outStorageType}\n"
10688 "%in_buf = OpTypeStruct %${inStorageType}\n"
10689 "%out_buf = OpTypeStruct %${outStorageType}\n"
10690 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
10691 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10692 "%indata = OpVariable %in_bufptr Uniform\n"
10693 "%outdata = OpVariable %out_bufptr Uniform\n"
10695 "%zero = OpConstant %i32 0\n"
10697 "%main = OpFunction %void None %voidf\n"
10698 "%label = OpLabel\n"
10699 "%inloc = OpAccessChain %in_ptr %indata %zero\n"
10700 "%outloc = OpAccessChain %out_ptr %outdata %zero\n"
10701 "%inval = OpLoad %${inStorageType} %inloc\n"
10702 "%in_cast = ${inCast} %${inputType} %inval\n"
10703 "%conv = ${instruction} %${outputType} %in_cast\n"
10704 "%out_cast = ${outCast} %${outStorageType} %conv\n"
10705 " OpStore %outloc %out_cast\n"
10710 return StringTemplate(shader).specialize(params);
10713 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10715 if (instruction == "OpUConvert")
10717 // Convert unsigned int to unsigned int
10718 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_16, 42));
10719 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_32, 73));
10720 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_64, 121));
10722 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_8, 33));
10723 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_32, 60653));
10724 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_64, 17991));
10726 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_64, 904256275));
10727 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_16, 6275));
10728 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_8, 17));
10730 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_32, 701256243));
10731 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_16, 4741));
10732 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_8, 65));
10734 // Zero extension for int->uint
10735 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10736 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 209));
10737 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 251));
10738 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10739 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 62195));
10740 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10742 // Truncate for int->uint
10743 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10744 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10745 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10746 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10747 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10748 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
10750 else if (instruction == "OpSConvert")
10752 // Sign extension int->int
10753 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_16, -30));
10754 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_32, 55));
10755 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_64, -3));
10756 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_32, 14669));
10757 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_64, -3341));
10758 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
10760 // Truncate for int->int
10761 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_8, 81));
10762 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_8, -93));
10763 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_8, 3182748172687672ll, true, 56));
10764 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_16, 12382));
10765 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_32, -972812359));
10766 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_16, -1067742499291926803ll, true, -4371));
10768 // Sign extension for int->uint
10769 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10770 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 4294967249u));
10771 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 18446744073709551611ull));
10772 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10773 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 18446744073709548275ull));
10774 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10776 // Truncate for int->uint
10777 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10778 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10779 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10780 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10781 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10782 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
10784 // Sign extension for uint->int
10785 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_16, 71));
10786 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_32, 201, true, -55));
10787 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_64, 188, true, -68));
10788 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_32, 14669));
10789 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_64, 62195, true, -3341));
10790 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
10792 // Truncate for uint->int
10793 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_8, 67));
10794 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_8, 133, true, -123));
10795 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_8, 836927654193256494ull, true, 46));
10796 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_16, 12382));
10797 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_32, 18446744072736739257ull, true, -972812359));
10798 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_16, 17379001574417624813ull, true, -4371));
10800 // Convert i16vec2 to i32vec2 and vice versa
10801 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10802 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10803 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_16, DATA_TYPE_VEC2_SIGNED_32, (33413u << 16) | 27593, true, (4294935173ull << 32) | 27593));
10804 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_32, DATA_TYPE_VEC2_SIGNED_16, (4294935173ull << 32) | 27593, true, (33413u << 16) | 27593));
10806 else if (instruction == "OpFConvert")
10808 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10809 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_64, 0x449a4000, true, 0x4093480000000000));
10810 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_32, 0x4093480000000000, true, 0x449a4000));
10812 // Conversion to/from 32-bit floats are supported by both 16-bit
10813 // storage and Float16. The tests are duplicated to exercise both
10815 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2));
10816 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000));
10817 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2, "no_storage", false));
10818 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000, "no_storage", false));
10820 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000));
10821 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2));
10822 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000, "no_storage", false));
10823 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2, "no_storage", false));
10826 else if (instruction == "OpConvertFToU")
10828 // Normal numbers from uint8 range
10829 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5020, true, 33, "33", false));
10830 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x42280000, true, 42, "42"));
10831 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x4067800000000000ull, true, 188, "188"));
10833 // Maximum uint8 value
10834 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BF8, true, 255, "max", false));
10835 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437F0000, true, 255, "max"));
10836 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FE00000000000ull, true, 255, "max"));
10839 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x0000, true, 0, "p0", false));
10840 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x00000000, true, 0, "p0"));
10841 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
10844 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x8000, true, 0, "m0", false));
10845 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x80000000, true, 0, "m0"));
10846 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
10848 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10849 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x64D2, true, 1234, "1234", false));
10850 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x64D2, true, 1234, "1234", false));
10851 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x64D2, true, 1234, "1234", false));
10853 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10854 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x7BFF, true, 65504, "max", false));
10855 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x7BFF, true, 65504, "max", false));
10856 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x7BFF, true, 65504, "max", false));
10859 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x0000, true, 0, "p0", false));
10860 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x0000, true, 0, "p0", false));
10861 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x0000, true, 0, "p0", false));
10864 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x8000, true, 0, "m0", false));
10865 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x8000, true, 0, "m0", false));
10866 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x8000, true, 0, "m0", false));
10868 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a4000, true, 1234));
10869 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a4000, true, 1234));
10870 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a4000, true, 1234));
10871 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x4093480000000000, true, 1234));
10872 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x4093480000000000, true, 1234));
10873 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x4093480000000000, true, 1234));
10875 else if (instruction == "OpConvertUToF")
10877 // Normal numbers from uint8 range
10878 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 116, true, 0x5740, "116", false));
10879 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 232, true, 0x43680000, "232"));
10880 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 164, true, 0x4064800000000000ull, "164"));
10882 // Maximum uint8 value
10883 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 255, true, 0x5BF8, "max", false));
10884 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 255, true, 0x437F0000, "max"));
10885 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 255, true, 0x406FE00000000000ull, "max"));
10887 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10888 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10889 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10890 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10892 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10893 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10894 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10895 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10897 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true, 0x4f800000, "4294967296", false));
10898 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true, 0x41f0000000000000, "4294967296", false));
10900 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 0xffffff0000000000, true, 0x5f7fffff, "max", false));
10902 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10903 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10904 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10905 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10906 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10907 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10909 else if (instruction == "OpConvertFToS")
10911 // Normal numbers from int8 range
10912 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC980, true, -11, "m11", false));
10913 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2140000, true, -37, "m37"));
10914 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050800000000000ull, true, -66, "m66"));
10916 // Minimum int8 value
10917 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD800, true, -128, "min", false));
10918 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC3000000, true, -128, "min"));
10919 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC060000000000000ull, true, -128, "min"));
10921 // Maximum int8 value
10922 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57F0, true, 127, "max", false));
10923 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FE0000, true, 127, "max"));
10924 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FC00000000000ull, true, 127, "max"));
10927 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x0000, true, 0, "p0", false));
10928 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x00000000, true, 0, "p0"));
10929 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
10932 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x8000, true, 0, "m0", false));
10933 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x80000000, true, 0, "m0"));
10934 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
10936 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10937 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xE4D2, true, -1234, "m1234", false));
10938 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xE4D2, true, -1234, "m1234", false));
10939 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xE4D2, true, -1234, "m1234", false));
10941 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10942 // 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
10943 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xF800, true, -32768, "min", false));
10944 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xFBFF, true, -65504, "min", false));
10945 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xFBFF, true, -65504, "min", false));
10947 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10948 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10949 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x77FF, true, 32752, "max", false));
10950 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x7BFF, true, 65504, "max", false));
10951 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x7BFF, true, 65504, "max", false));
10954 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x0000, true, 0, "p0", false));
10955 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x0000, true, 0, "p0", false));
10956 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x0000, true, 0, "p0", false));
10959 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x8000, true, 0, "m0", false));
10960 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x8000, true, 0, "m0", false));
10961 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x8000, true, 0, "m0", false));
10963 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a4000, true, -1234));
10964 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a4000, true, -1234));
10965 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a4000, true, -1234));
10966 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc093480000000000, true, -1234));
10967 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc093480000000000, true, -1234));
10968 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc093480000000000, true, -1234));
10969 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9000, true, 3001, "p3001"));
10970 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9000, true, -3001, "m3001"));
10972 else if (instruction == "OpConvertSToF")
10974 // Normal numbers from int8 range
10975 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -12, true, 0xCA00, "m21", false));
10976 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -21, true, 0xC1A80000, "m21"));
10977 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -99, true, 0xC058C00000000000ull, "m99"));
10979 // Minimum int8 value
10980 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -128, true, 0xD800, "min", false));
10981 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -128, true, 0xC3000000, "min"));
10982 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -128, true, 0xC060000000000000ull, "min"));
10984 // Maximum int8 value
10985 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, 127, true, 0x57F0, "max", false));
10986 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, 127, true, 0x42FE0000, "max"));
10987 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, 127, true, 0x405FC00000000000ull, "max"));
10989 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10990 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
10991 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
10992 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
10994 // 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
10995 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
10996 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
10998 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10999 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11000 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11002 // 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
11003 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "min", false));
11004 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11005 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11007 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11008 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11009 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, 32752, true, 0x77FF, "max", false));
11010 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11011 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11013 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true, 0x4f800000, "p4294967296", false));
11014 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true, 0x41f0000000000000, "p4294967296", false));
11015 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -4294967296ll, true, 0xcf800000, "m4294967296", false));
11016 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -4294967296ll, true, 0xc1f0000000000000, "m4294967296", false));
11018 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 0x7fffff8000000000, true, 0x5effffff, "max", false));
11019 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -0x7fffff8000000000, true, 0xdeffffff, "min", false));
11021 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11022 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11023 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11024 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11025 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11026 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11029 DE_FATAL("Unknown instruction");
11032 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
11034 map<string, string> params = convertCase.m_asmTypes;
11035 map<string, string> fragments;
11037 params["instruction"] = instruction;
11038 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11040 const StringTemplate decoration (
11041 " OpDecorate %SSBOi DescriptorSet 0\n"
11042 " OpDecorate %SSBOo DescriptorSet 0\n"
11043 " OpDecorate %SSBOi Binding 0\n"
11044 " OpDecorate %SSBOo Binding 1\n"
11045 " OpDecorate %s_SSBOi Block\n"
11046 " OpDecorate %s_SSBOo Block\n"
11047 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11048 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11050 const StringTemplate pre_main (
11051 "${datatype_additional_decl:opt}"
11052 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11053 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11054 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11055 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11056 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11057 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11058 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11059 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11061 const StringTemplate testfun (
11062 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11063 "%param = OpFunctionParameter %v4f32\n"
11064 "%label = OpLabel\n"
11065 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11066 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11067 "%valIn = OpLoad %${inStorageType} %iLoc\n"
11068 "%valInCast = ${inCast} %${inputType} %valIn\n"
11069 "%conv = ${instruction} %${outputType} %valInCast\n"
11070 "%valOutCast = ${outCast} %${outStorageType} %conv\n"
11071 " OpStore %oLoc %valOutCast\n"
11072 " OpReturnValue %param\n"
11073 " OpFunctionEnd\n");
11075 params["datatype_extensions"] =
11076 params["datatype_extensions"] +
11077 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11079 fragments["capability"] = params["datatype_capabilities"];
11080 fragments["extension"] = params["datatype_extensions"];
11081 fragments["decoration"] = decoration.specialize(params);
11082 fragments["pre_main"] = pre_main.specialize(params);
11083 fragments["testfun"] = testfun.specialize(params);
11088 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase& convertCase)
11090 map<string, string> params = convertCase.m_asmTypes;
11091 map<string, string> fragments;
11093 params["instruction"] = instruction;
11094 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11096 const StringTemplate decoration(
11097 " OpDecorate %SSBOi DescriptorSet 0\n"
11098 " OpDecorate %SSBOo DescriptorSet 0\n"
11099 " OpDecorate %SSBOi Binding 0\n"
11100 " OpDecorate %SSBOo Binding 1\n"
11101 " OpDecorate %s_SSBOi Block\n"
11102 " OpDecorate %s_SSBOo Block\n"
11103 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11104 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11106 const StringTemplate pre_main(
11107 "${datatype_additional_decl:opt}"
11108 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11109 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11110 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11111 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11112 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11113 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11114 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11115 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11117 const StringTemplate testfun(
11118 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11119 "%param = OpFunctionParameter %v4f32\n"
11120 "%label = OpLabel\n"
11121 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11122 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11123 "%inval = OpLoad %${inStorageType} %iLoc\n"
11124 "%in_cast = ${inCast} %${inputType} %inval\n"
11125 "%conv = ${instruction} %${outputType} %in_cast\n"
11126 "%out_cast = ${outCast} %${outStorageType} %conv\n"
11127 " OpStore %oLoc %out_cast\n"
11128 " OpReturnValue %param\n"
11129 " OpFunctionEnd\n");
11131 params["datatype_extensions"] =
11132 params["datatype_extensions"] +
11133 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11135 fragments["capability"] = params["datatype_capabilities"];
11136 fragments["extension"] = params["datatype_extensions"];
11137 fragments["decoration"] = decoration.specialize(params);
11138 fragments["pre_main"] = pre_main.specialize(params);
11139 fragments["testfun"] = testfun.specialize(params);
11143 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
11144 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11146 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11147 vector<ConvertCase> testCases;
11148 createConvertCases(testCases, instruction);
11150 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11152 ComputeShaderSpec spec;
11153 spec.assembly = getConvertCaseShaderStr(instruction, *test, true);
11154 spec.numWorkGroups = IVec3(1, 1, 1);
11155 spec.inputs.push_back (test->m_inputBuffer);
11156 spec.outputs.push_back (test->m_outputBuffer);
11158 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
11160 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
11162 return group.release();
11165 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
11166 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11168 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11169 vector<ConvertCase> testCases;
11170 createConvertCases(testCases, instruction);
11172 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11174 map<string, string> fragments = (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) : getConvertCaseFragmentsNoStorage(instruction,*test);
11175 VulkanFeatures vulkanFeatures;
11176 GraphicsResources resources;
11177 vector<string> extensions;
11178 SpecConstants noSpecConstants;
11179 PushConstants noPushConstants;
11180 GraphicsInterfaces noInterfaces;
11181 tcu::RGBA defaultColors[4];
11183 getDefaultColors (defaultColors);
11184 resources.inputs.push_back (Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11185 resources.outputs.push_back (Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11186 extensions.push_back ("VK_KHR_storage_buffer_storage_class");
11188 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
11190 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
11191 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
11193 createTestsForAllStages(
11194 test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
11195 noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
11197 return group.release();
11200 // Constant-Creation Instructions: OpConstant, OpConstantComposite
11201 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
11203 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests (new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
11204 RGBA inputColors[4];
11205 RGBA outputColors[4];
11206 vector<string> extensions;
11207 GraphicsResources resources;
11208 VulkanFeatures features;
11210 const char functionStart[] =
11211 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11212 "%param1 = OpFunctionParameter %v4f32\n"
11213 "%lbl = OpLabel\n";
11215 const char functionEnd[] =
11216 "%transformed_param_32 = OpFConvert %v4f32 %transformed_param\n"
11217 " OpReturnValue %transformed_param_32\n"
11218 " OpFunctionEnd\n";
11220 struct NameConstantsCode
11227 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11228 "%f16 = OpTypeFloat 16\n" \
11229 "%c_f16_0 = OpConstant %f16 0.0\n" \
11230 "%c_f16_0_5 = OpConstant %f16 0.5\n" \
11231 "%c_f16_1 = OpConstant %f16 1.0\n" \
11232 "%v4f16 = OpTypeVector %f16 4\n" \
11233 "%fp_f16 = OpTypePointer Function %f16\n" \
11234 "%fp_v4f16 = OpTypePointer Function %v4f16\n" \
11235 "%c_v4f16_1_1_1_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11236 "%a4f16 = OpTypeArray %f16 %c_u32_4\n" \
11238 NameConstantsCode tests[] =
11243 FLOAT_16_COMMON_TYPES_AND_CONSTS
11244 "%cval = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11245 "%param1_16 = OpFConvert %v4f16 %param1\n"
11246 "%transformed_param = OpFAdd %v4f16 %param1_16 %cval\n"
11251 FLOAT_16_COMMON_TYPES_AND_CONSTS
11252 "%stype = OpTypeStruct %v4f16 %f16\n"
11253 "%fp_stype = OpTypePointer Function %stype\n"
11254 "%f16_n_1 = OpConstant %f16 -1.0\n"
11255 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11256 "%cvec = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11257 "%cval = OpConstantComposite %stype %cvec %f16_n_1\n",
11259 "%v = OpVariable %fp_stype Function %cval\n"
11260 "%vec_ptr = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11261 "%f16_ptr = OpAccessChain %fp_f16 %v %c_u32_1\n"
11262 "%vec_val = OpLoad %v4f16 %vec_ptr\n"
11263 "%f16_val = OpLoad %f16 %f16_ptr\n"
11264 "%tmp1 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11265 "%param1_16 = OpFConvert %v4f16 %param1\n"
11266 "%tmp2 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11267 "%transformed_param = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11270 // [1|0|0|0.5] [x] = x + 0.5
11271 // [0|1|0|0.5] [y] = y + 0.5
11272 // [0|0|1|0.5] [z] = z + 0.5
11273 // [0|0|0|1 ] [1] = 1
11276 FLOAT_16_COMMON_TYPES_AND_CONSTS
11277 "%mat4x4_f16 = OpTypeMatrix %v4f16 4\n"
11278 "%v4f16_1_0_0_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11279 "%v4f16_0_1_0_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11280 "%v4f16_0_0_1_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11281 "%v4f16_0_5_0_5_0_5_1 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11282 "%cval = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
11284 "%param1_16 = OpFConvert %v4f16 %param1\n"
11285 "%transformed_param = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
11290 FLOAT_16_COMMON_TYPES_AND_CONSTS
11291 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11292 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11293 "%f16_n_1 = OpConstant %f16 -1.0\n"
11294 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11295 "%carr = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11297 "%v = OpVariable %fp_a4f16 Function %carr\n"
11298 "%f = OpAccessChain %fp_f16 %v %c_u32_0\n"
11299 "%f1 = OpAccessChain %fp_f16 %v %c_u32_1\n"
11300 "%f2 = OpAccessChain %fp_f16 %v %c_u32_2\n"
11301 "%f3 = OpAccessChain %fp_f16 %v %c_u32_3\n"
11302 "%f_val = OpLoad %f16 %f\n"
11303 "%f1_val = OpLoad %f16 %f1\n"
11304 "%f2_val = OpLoad %f16 %f2\n"
11305 "%f3_val = OpLoad %f16 %f3\n"
11306 "%ftot1 = OpFAdd %f16 %f_val %f1_val\n"
11307 "%ftot2 = OpFAdd %f16 %ftot1 %f2_val\n"
11308 "%ftot3 = OpFAdd %f16 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
11309 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11310 "%param1_16 = OpFConvert %v4f16 %param1\n"
11311 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"
11318 // [ 1.0, 1.0, 1.0, 1.0]
11322 // [ 0.0, 0.5, 0.0, 0.0]
11326 // [ 1.0, 1.0, 1.0, 1.0]
11329 "array_of_struct_of_array",
11331 FLOAT_16_COMMON_TYPES_AND_CONSTS
11332 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11333 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11334 "%stype = OpTypeStruct %f16 %a4f16\n"
11335 "%a3stype = OpTypeArray %stype %c_u32_3\n"
11336 "%fp_a3stype = OpTypePointer Function %a3stype\n"
11337 "%ca4f16_0 = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11338 "%ca4f16_1 = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11339 "%cstype1 = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11340 "%cstype2 = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11341 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11343 "%v = OpVariable %fp_a3stype Function %carr\n"
11344 "%f = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11345 "%f_l = OpLoad %f16 %f\n"
11346 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11347 "%param1_16 = OpFConvert %v4f16 %param1\n"
11348 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"
11352 getHalfColorsFullAlpha(inputColors);
11353 outputColors[0] = RGBA(255, 255, 255, 255);
11354 outputColors[1] = RGBA(255, 127, 127, 255);
11355 outputColors[2] = RGBA(127, 255, 127, 255);
11356 outputColors[3] = RGBA(127, 127, 255, 255);
11358 extensions.push_back("VK_KHR_shader_float16_int8");
11359 features.extFloat16Int8.shaderFloat16 = true;
11361 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11363 map<string, string> fragments;
11365 fragments["capability"] = "OpCapability Float16\n";
11366 fragments["pre_main"] = tests[testNdx].constants;
11367 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
11369 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
11371 return opConstantCompositeTests.release();
11374 template<typename T>
11375 void finalizeTestsCreation (T& specResource,
11376 const map<string, string>& fragments,
11377 tcu::TestContext& testCtx,
11378 tcu::TestCaseGroup& testGroup,
11379 const std::string& testName,
11380 const VulkanFeatures& vulkanFeatures,
11381 const vector<string>& extensions,
11382 const IVec3& numWorkGroups,
11383 const bool splitRenderArea = false);
11386 void finalizeTestsCreation (GraphicsResources& specResource,
11387 const map<string, string>& fragments,
11388 tcu::TestContext& ,
11389 tcu::TestCaseGroup& testGroup,
11390 const std::string& testName,
11391 const VulkanFeatures& vulkanFeatures,
11392 const vector<string>& extensions,
11394 const bool splitRenderArea)
11396 RGBA defaultColors[4];
11397 getDefaultColors(defaultColors);
11399 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11403 void finalizeTestsCreation (ComputeShaderSpec& specResource,
11404 const map<string, string>& fragments,
11405 tcu::TestContext& testCtx,
11406 tcu::TestCaseGroup& testGroup,
11407 const std::string& testName,
11408 const VulkanFeatures& vulkanFeatures,
11409 const vector<string>& extensions,
11410 const IVec3& numWorkGroups,
11413 specResource.numWorkGroups = numWorkGroups;
11414 specResource.requestedVulkanFeatures = vulkanFeatures;
11415 specResource.extensions = extensions;
11417 specResource.assembly = makeComputeShaderAssembly(fragments);
11419 testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
11422 template<class SpecResource>
11423 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
11425 const string nan = nanSupported ? "_nan" : "";
11426 const string groupName = "logical" + nan;
11427 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
11429 de::Random rnd (deStringHash(testGroup->getName()));
11430 const string spvCapabilities = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11431 const string spvExtensions = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11432 const string spvExecutionMode = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11433 const deUint32 numDataPointsScalar = 16;
11434 const deUint32 numDataPointsVector = 14;
11435 const vector<deFloat16> float16DataScalar = getFloat16s(rnd, numDataPointsScalar);
11436 const vector<deFloat16> float16DataVector = getFloat16s(rnd, numDataPointsVector);
11437 const vector<deFloat16> float16Data1 = squarize(float16DataScalar, 0); // Total Size: square(sizeof(float16DataScalar))
11438 const vector<deFloat16> float16Data2 = squarize(float16DataScalar, 1);
11439 const vector<deFloat16> float16DataVec1 = squarizeVector(float16DataVector, 0); // Total Size: 2 * (square(square(sizeof(float16DataVector))))
11440 const vector<deFloat16> float16DataVec2 = squarizeVector(float16DataVector, 1);
11441 const vector<deFloat16> float16OutUnused (float16Data1.size(), 0);
11442 const vector<deFloat16> float16OutVecUnused (float16DataVec1.size(), 0);
11446 const char* opCode;
11447 VerifyIOFunc verifyFuncNan;
11448 VerifyIOFunc verifyFuncNonNan;
11449 const deUint32 argCount;
11452 const TestOp testOps[] =
11454 { "OpIsNan" , compareFP16Logical<fp16isNan, true, false, true>, compareFP16Logical<fp16isNan, true, false, false>, 1 },
11455 { "OpIsInf" , compareFP16Logical<fp16isInf, true, false, true>, compareFP16Logical<fp16isInf, true, false, false>, 1 },
11456 { "OpFOrdEqual" , compareFP16Logical<fp16isEqual, false, true, true>, compareFP16Logical<fp16isEqual, false, true, false>, 2 },
11457 { "OpFUnordEqual" , compareFP16Logical<fp16isEqual, false, false, true>, compareFP16Logical<fp16isEqual, false, false, false>, 2 },
11458 { "OpFOrdNotEqual" , compareFP16Logical<fp16isUnequal, false, true, true>, compareFP16Logical<fp16isUnequal, false, true, false>, 2 },
11459 { "OpFUnordNotEqual" , compareFP16Logical<fp16isUnequal, false, false, true>, compareFP16Logical<fp16isUnequal, false, false, false>, 2 },
11460 { "OpFOrdLessThan" , compareFP16Logical<fp16isLess, false, true, true>, compareFP16Logical<fp16isLess, false, true, false>, 2 },
11461 { "OpFUnordLessThan" , compareFP16Logical<fp16isLess, false, false, true>, compareFP16Logical<fp16isLess, false, false, false>, 2 },
11462 { "OpFOrdGreaterThan" , compareFP16Logical<fp16isGreater, false, true, true>, compareFP16Logical<fp16isGreater, false, true, false>, 2 },
11463 { "OpFUnordGreaterThan" , compareFP16Logical<fp16isGreater, false, false, true>, compareFP16Logical<fp16isGreater, false, false, false>, 2 },
11464 { "OpFOrdLessThanEqual" , compareFP16Logical<fp16isLessOrEqual, false, true, true>, compareFP16Logical<fp16isLessOrEqual, false, true, false>, 2 },
11465 { "OpFUnordLessThanEqual" , compareFP16Logical<fp16isLessOrEqual, false, false, true>, compareFP16Logical<fp16isLessOrEqual, false, false, false>, 2 },
11466 { "OpFOrdGreaterThanEqual" , compareFP16Logical<fp16isGreaterOrEqual, false, true, true>, compareFP16Logical<fp16isGreaterOrEqual, false, true, false>, 2 },
11467 { "OpFUnordGreaterThanEqual" , compareFP16Logical<fp16isGreaterOrEqual, false, false, true>, compareFP16Logical<fp16isGreaterOrEqual, false, false, false>, 2 },
11471 const StringTemplate preMain
11473 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11474 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11475 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11476 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11477 " %f16 = OpTypeFloat 16\n"
11478 " %v2f16 = OpTypeVector %f16 2\n"
11479 " %c_f16_0 = OpConstant %f16 0.0\n"
11480 " %c_f16_1 = OpConstant %f16 1.0\n"
11481 " %up_u32 = OpTypePointer Uniform %u32\n"
11482 " %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11483 " %SSBO16 = OpTypeStruct %ra_u32\n"
11484 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11485 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11486 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11487 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11488 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11489 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11492 const StringTemplate decoration
11494 "OpDecorate %ra_u32 ArrayStride 4\n"
11495 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11496 "OpDecorate %SSBO16 BufferBlock\n"
11497 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11498 "OpDecorate %ssbo_src0 Binding 0\n"
11499 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11500 "OpDecorate %ssbo_src1 Binding 1\n"
11501 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11502 "OpDecorate %ssbo_dst Binding 2\n"
11505 const StringTemplate testFun
11507 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11508 " %param = OpFunctionParameter %v4f32\n"
11510 " %entry = OpLabel\n"
11511 " %i = OpVariable %fp_i32 Function\n"
11512 " OpStore %i %c_i32_0\n"
11513 " OpBranch %loop\n"
11515 " %loop = OpLabel\n"
11516 " %i_cmp = OpLoad %i32 %i\n"
11517 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11518 " OpLoopMerge %merge %next None\n"
11519 " OpBranchConditional %lt %write %merge\n"
11521 " %write = OpLabel\n"
11522 " %ndx = OpLoad %i32 %i\n"
11524 " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11528 " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11529 " %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11530 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11531 " OpBranch %next\n"
11533 " %next = OpLabel\n"
11534 " %i_cur = OpLoad %i32 %i\n"
11535 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11536 " OpStore %i %i_new\n"
11537 " OpBranch %loop\n"
11539 " %merge = OpLabel\n"
11540 " OpReturnValue %param\n"
11545 const StringTemplate arg1Calc
11547 " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11550 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11552 const size_t iterations = float16Data1.size();
11553 const TestOp& testOp = testOps[testOpsIdx];
11554 const string testName = de::toLower(string(testOp.opCode)) + "_scalar";
11555 SpecResource specResource;
11556 map<string, string> specs;
11557 VulkanFeatures features;
11558 map<string, string> fragments;
11559 vector<string> extensions;
11561 specs["num_data_points"] = de::toString(iterations);
11562 specs["op_code"] = testOp.opCode;
11563 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
11564 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11566 fragments["extension"] = spvExtensions;
11567 fragments["capability"] = spvCapabilities;
11568 fragments["execution_mode"] = spvExecutionMode;
11569 fragments["decoration"] = decoration.specialize(specs);
11570 fragments["pre_main"] = preMain.specialize(specs);
11571 fragments["testfun"] = testFun.specialize(specs);
11572 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11573 if (testOp.argCount > 1)
11575 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11577 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11579 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11580 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11581 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11582 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11584 extensions.push_back("VK_KHR_shader_float16_int8");
11588 extensions.push_back("VK_KHR_shader_float_controls");
11590 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11593 features.extFloat16Int8.shaderFloat16 = true;
11595 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11599 const StringTemplate preMain
11601 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11602 " %v2bool = OpTypeVector %bool 2\n"
11603 " %f16 = OpTypeFloat 16\n"
11604 " %c_f16_0 = OpConstant %f16 0.0\n"
11605 " %c_f16_1 = OpConstant %f16 1.0\n"
11606 " %v2f16 = OpTypeVector %f16 2\n"
11607 " %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11608 " %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11609 " %up_u32 = OpTypePointer Uniform %u32\n"
11610 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11611 " %SSBO16 = OpTypeStruct %ra_u32\n"
11612 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11613 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11614 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11615 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11616 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11617 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11620 const StringTemplate decoration
11622 "OpDecorate %ra_u32 ArrayStride 4\n"
11623 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11624 "OpDecorate %SSBO16 BufferBlock\n"
11625 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11626 "OpDecorate %ssbo_src0 Binding 0\n"
11627 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11628 "OpDecorate %ssbo_src1 Binding 1\n"
11629 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11630 "OpDecorate %ssbo_dst Binding 2\n"
11633 const StringTemplate testFun
11635 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11636 " %param = OpFunctionParameter %v4f32\n"
11638 " %entry = OpLabel\n"
11639 " %i = OpVariable %fp_i32 Function\n"
11640 " OpStore %i %c_i32_0\n"
11641 " OpBranch %loop\n"
11643 " %loop = OpLabel\n"
11644 " %i_cmp = OpLoad %i32 %i\n"
11645 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11646 " OpLoopMerge %merge %next None\n"
11647 " OpBranchConditional %lt %write %merge\n"
11649 " %write = OpLabel\n"
11650 " %ndx = OpLoad %i32 %i\n"
11652 " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11656 " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11657 " %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11658 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11659 " OpBranch %next\n"
11661 " %next = OpLabel\n"
11662 " %i_cur = OpLoad %i32 %i\n"
11663 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11664 " OpStore %i %i_new\n"
11665 " OpBranch %loop\n"
11667 " %merge = OpLabel\n"
11668 " OpReturnValue %param\n"
11673 const StringTemplate arg1Calc
11675 " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11678 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11680 const deUint32 itemsPerVec = 2;
11681 const size_t iterations = float16DataVec1.size() / itemsPerVec;
11682 const TestOp& testOp = testOps[testOpsIdx];
11683 const string testName = de::toLower(string(testOp.opCode)) + "_vector";
11684 SpecResource specResource;
11685 map<string, string> specs;
11686 vector<string> extensions;
11687 VulkanFeatures features;
11688 map<string, string> fragments;
11690 specs["num_data_points"] = de::toString(iterations);
11691 specs["op_code"] = testOp.opCode;
11692 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
11693 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11695 fragments["extension"] = spvExtensions;
11696 fragments["capability"] = spvCapabilities;
11697 fragments["execution_mode"] = spvExecutionMode;
11698 fragments["decoration"] = decoration.specialize(specs);
11699 fragments["pre_main"] = preMain.specialize(specs);
11700 fragments["testfun"] = testFun.specialize(specs);
11701 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11702 if (testOp.argCount > 1)
11704 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11706 fragments["testfun"] += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11708 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11709 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11710 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11711 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11713 extensions.push_back("VK_KHR_shader_float16_int8");
11717 extensions.push_back("VK_KHR_shader_float_controls");
11719 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11722 features.extFloat16Int8.shaderFloat16 = true;
11724 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1), true);
11728 return testGroup.release();
11731 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11733 if (inputs.size() != 1 || outputAllocs.size() != 1)
11736 vector<deUint8> input1Bytes;
11738 inputs[0].getBytes(input1Bytes);
11740 const deUint16* const input1AsFP16 = (const deUint16*)&input1Bytes[0];
11741 const deUint16* const outputAsFP16 = (const deUint16*)outputAllocs[0]->getHostPtr();
11744 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11746 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11748 log << TestLog::Message << error << TestLog::EndMessage;
11757 template<class SpecResource>
11758 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11760 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11762 de::Random rnd (deStringHash(testGroup->getName()));
11763 const StringTemplate capabilities ("OpCapability Float16\n");
11764 const deUint32 numDataPoints = 256;
11765 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
11766 const vector<deFloat16> float16OutputUnused (float16InputData.size(), 0);
11767 map<string, string> fragments;
11771 const deUint32 typeComponents;
11772 const char* typeName;
11773 const char* typeDecls;
11774 const char* typeStorage;
11775 const string loadFunc;
11776 const string storeFunc;
11779 const TestType testTypes[] =
11784 " %v2f16 = OpTypeVector %f16 2\n"
11785 "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11786 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11787 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11788 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11790 loadScalarF16FromUint,
11791 storeScalarF16AsUint
11796 " %v2f16 = OpTypeVector %f16 2\n"
11797 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11798 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11799 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11807 " %v2f16 = OpTypeVector %f16 2\n"
11808 " %v4f16 = OpTypeVector %f16 4\n"
11809 " %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11810 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11811 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11813 loadV4F16FromUints,
11818 const StringTemplate preMain
11820 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11821 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11822 " %v2bool = OpTypeVector %bool 2\n"
11823 " %f16 = OpTypeFloat 16\n"
11824 " %c_f16_0 = OpConstant %f16 0.0\n"
11828 " %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11829 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11830 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11831 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11832 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11833 " %up_u32 = OpTypePointer Uniform %u32\n"
11834 " %SSBO16 = OpTypeStruct %ra_${ts}\n"
11835 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11836 " %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11837 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11840 const StringTemplate decoration
11842 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11843 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11844 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
11845 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11846 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11847 "OpDecorate %SSBO16 BufferBlock\n"
11848 "OpDecorate %ssbo_src DescriptorSet 0\n"
11849 "OpDecorate %ssbo_src Binding 0\n"
11850 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11851 "OpDecorate %ssbo_dst Binding 1\n"
11854 const StringTemplate testFun
11856 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11857 " %param = OpFunctionParameter %v4f32\n"
11858 " %entry = OpLabel\n"
11860 " %i = OpVariable %fp_i32 Function\n"
11861 " OpStore %i %c_i32_0\n"
11862 " OpBranch %loop\n"
11864 " %loop = OpLabel\n"
11865 " %i_cmp = OpLoad %i32 %i\n"
11866 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11867 " OpLoopMerge %merge %next None\n"
11868 " OpBranchConditional %lt %write %merge\n"
11870 " %write = OpLabel\n"
11871 " %ndx = OpLoad %i32 %i\n"
11873 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11874 " %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11875 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11876 " OpBranch %next\n"
11878 " %next = OpLabel\n"
11879 " %i_cur = OpLoad %i32 %i\n"
11880 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11881 " OpStore %i %i_new\n"
11882 " OpBranch %loop\n"
11884 " %merge = OpLabel\n"
11885 " OpReturnValue %param\n"
11889 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11890 " %param0 = OpFunctionParameter %${tt}\n"
11891 " %entry_pf = OpLabel\n"
11892 " %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11893 " OpReturnValue %res0\n"
11897 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11899 const TestType& testType = testTypes[testTypeIdx];
11900 const string testName = testType.typeName;
11901 const deUint32 itemsPerType = testType.typeComponents;
11902 const size_t iterations = float16InputData.size() / itemsPerType;
11903 const size_t typeStride = itemsPerType * sizeof(deFloat16);
11904 SpecResource specResource;
11905 map<string, string> specs;
11906 VulkanFeatures features;
11907 vector<string> extensions;
11909 specs["num_data_points"] = de::toString(iterations);
11910 specs["tt"] = testType.typeName;
11911 specs["ts"] = testType.typeStorage;
11912 specs["tt_stride"] = de::toString(typeStride);
11913 specs["type_decls"] = testType.typeDecls;
11915 fragments["capability"] = capabilities.specialize(specs);
11916 fragments["decoration"] = decoration.specialize(specs);
11917 fragments["pre_main"] = preMain.specialize(specs);
11918 fragments["testfun"] = testFun.specialize(specs);
11919 fragments["testfun"] += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11920 fragments["testfun"] += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11922 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11923 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11924 specResource.verifyIO = compareFP16FunctionSetFunc;
11926 extensions.push_back("VK_KHR_shader_float16_int8");
11928 features.extFloat16Int8.shaderFloat16 = true;
11930 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11933 return testGroup.release();
11936 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11938 if (inputs.size() != 2 || outputAllocs.size() != 1)
11941 vector<deUint8> input1Bytes;
11942 vector<deUint8> input2Bytes;
11944 inputs[0].getBytes(input1Bytes);
11945 inputs[1].getBytes(input2Bytes);
11947 DE_ASSERT(input1Bytes.size() > 0);
11948 DE_ASSERT(input2Bytes.size() > 0);
11949 DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11951 const size_t iterations = input2Bytes.size() / sizeof(deUint32);
11952 const size_t components = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11953 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
11954 const deUint32* const inputIndices = (const deUint32*)&input2Bytes[0];
11955 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
11958 DE_ASSERT(components == 2 || components == 4);
11959 DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11961 for (size_t idx = 0; idx < iterations; ++idx)
11963 const deUint32 componentNdx = inputIndices[idx];
11965 DE_ASSERT(componentNdx < components);
11967 const deFloat16 expected = input1AsFP16[components * idx + componentNdx];
11969 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11971 log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11980 template<class SpecResource>
11981 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11983 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11985 de::Random rnd (deStringHash(testGroup->getName()));
11986 const deUint32 numDataPoints = 256;
11987 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
11988 const vector<deFloat16> float16OutputUnused (float16InputData.size(), 0);
11992 const deUint32 typeComponents;
11993 const size_t typeStride;
11994 const char* typeName;
11995 const char* typeDecls;
11996 const char* typeStorage;
11997 const string loadFunction;
11998 const string storeFunction;
12001 const TestType testTypes[] =
12005 2 * sizeof(deFloat16),
12007 " %v2f16 = OpTypeVector %f16 2\n"
12008 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12009 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12010 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12011 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12014 storeScalarF16AsUint
12018 4 * sizeof(deFloat16),
12020 " %v2f16 = OpTypeVector %f16 2\n"
12021 " %v3f16 = OpTypeVector %f16 3\n"
12022 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12023 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12024 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12025 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12027 loadV3F16FromUints,
12028 storeScalarF16AsUint
12032 4 * sizeof(deFloat16),
12034 " %v2f16 = OpTypeVector %f16 2\n"
12035 " %v4f16 = OpTypeVector %f16 4\n"
12036 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12037 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12038 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12039 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12041 loadV4F16FromUints,
12042 storeScalarF16AsUint
12046 const StringTemplate preMain
12048 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12049 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12050 " %f16 = OpTypeFloat 16\n"
12054 " %up_u32 = OpTypePointer Uniform %u32\n"
12055 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12056 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12057 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12059 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12060 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12061 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12062 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12063 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12065 " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12066 " %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12067 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12069 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12070 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12071 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12074 const StringTemplate decoration
12076 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12077 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12078 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12079 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12080 "OpDecorate %SSBO_SRC BufferBlock\n"
12081 "OpDecorate %ssbo_src DescriptorSet 0\n"
12082 "OpDecorate %ssbo_src Binding 0\n"
12084 "OpDecorate %ra_u32 ArrayStride 4\n"
12085 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12086 "OpDecorate %SSBO_IDX BufferBlock\n"
12087 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12088 "OpDecorate %ssbo_idx Binding 1\n"
12090 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12091 "OpDecorate %SSBO_DST BufferBlock\n"
12092 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12093 "OpDecorate %ssbo_dst Binding 2\n"
12096 const StringTemplate testFun
12098 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12099 " %param = OpFunctionParameter %v4f32\n"
12100 " %entry = OpLabel\n"
12102 " %i = OpVariable %fp_i32 Function\n"
12103 " OpStore %i %c_i32_0\n"
12105 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12106 " OpSelectionMerge %end_if None\n"
12107 " OpBranchConditional %will_run %run_test %end_if\n"
12109 " %run_test = OpLabel\n"
12110 " OpBranch %loop\n"
12112 " %loop = OpLabel\n"
12113 " %i_cmp = OpLoad %i32 %i\n"
12114 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12115 " OpLoopMerge %merge %next None\n"
12116 " OpBranchConditional %lt %write %merge\n"
12118 " %write = OpLabel\n"
12119 " %ndx = OpLoad %i32 %i\n"
12121 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12123 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12124 " %val_idx = OpLoad %u32 %src_idx\n"
12126 " %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12127 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12129 " OpBranch %next\n"
12131 " %next = OpLabel\n"
12132 " %i_cur = OpLoad %i32 %i\n"
12133 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12134 " OpStore %i %i_new\n"
12135 " OpBranch %loop\n"
12137 " %merge = OpLabel\n"
12138 " OpBranch %end_if\n"
12139 " %end_if = OpLabel\n"
12140 " OpReturnValue %param\n"
12145 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12147 const TestType& testType = testTypes[testTypeIdx];
12148 const string testName = testType.typeName;
12149 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12150 const size_t iterations = float16InputData.size() / itemsPerType;
12151 SpecResource specResource;
12152 map<string, string> specs;
12153 VulkanFeatures features;
12154 vector<deUint32> inputDataNdx;
12155 map<string, string> fragments;
12156 vector<string> extensions;
12158 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12159 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12161 specs["num_data_points"] = de::toString(iterations);
12162 specs["tt"] = testType.typeName;
12163 specs["ts"] = testType.typeStorage;
12164 specs["tt_stride"] = de::toString(testType.typeStride);
12165 specs["type_decl"] = testType.typeDecls;
12167 fragments["capability"] = "OpCapability Float16\n";
12168 fragments["decoration"] = decoration.specialize(specs);
12169 fragments["pre_main"] = preMain.specialize(specs);
12170 fragments["testfun"] = testFun.specialize(specs);
12171 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12172 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12174 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12175 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12176 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12177 specResource.verifyIO = compareFP16VectorExtractFunc;
12179 extensions.push_back("VK_KHR_shader_float16_int8");
12181 features.extFloat16Int8.shaderFloat16 = true;
12183 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12186 return testGroup.release();
12189 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
12190 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12192 if (inputs.size() != 2 || outputAllocs.size() != 1)
12195 vector<deUint8> input1Bytes;
12196 vector<deUint8> input2Bytes;
12198 inputs[0].getBytes(input1Bytes);
12199 inputs[1].getBytes(input2Bytes);
12201 DE_ASSERT(input1Bytes.size() > 0);
12202 DE_ASSERT(input2Bytes.size() > 0);
12203 DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12205 const size_t iterations = input2Bytes.size() / sizeof(deUint32);
12206 const size_t componentsStride = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12207 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12208 const deUint32* const inputIndices = (const deUint32*)&input2Bytes[0];
12209 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12210 const deFloat16 magic = tcu::Float16(float(REPLACEMENT)).bits();
12213 DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12214 DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12216 for (size_t idx = 0; idx < iterations; ++idx)
12218 const deFloat16* inputVec = &input1AsFP16[componentsStride * idx];
12219 const deFloat16* outputVec = &outputAsFP16[componentsStride * idx];
12220 const deUint32 replacedCompNdx = inputIndices[idx];
12222 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12224 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12226 const deFloat16 expected = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12228 if (!compare16BitFloat(expected, outputVec[compNdx], error))
12230 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12240 template<class SpecResource>
12241 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
12243 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
12245 de::Random rnd (deStringHash(testGroup->getName()));
12246 const deUint32 replacement = 42;
12247 const deUint32 numDataPoints = 256;
12248 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12249 const vector<deFloat16> float16OutputUnused (float16InputData.size(), 0);
12253 const deUint32 typeComponents;
12254 const size_t typeStride;
12255 const char* typeName;
12256 const char* typeDecls;
12257 VerifyIOFunc verifyIOFunc;
12258 const char* typeStorage;
12259 const string loadFunction;
12260 const string storeFunction;
12263 const TestType testTypes[] =
12267 2 * sizeof(deFloat16),
12269 " %v2f16 = OpTypeVector %f16 2\n"
12270 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12271 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12272 compareFP16VectorInsertFunc<2, replacement>,
12279 4 * sizeof(deFloat16),
12281 " %v2f16 = OpTypeVector %f16 2\n"
12282 " %v3f16 = OpTypeVector %f16 3\n"
12283 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12284 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12285 compareFP16VectorInsertFunc<3, replacement>,
12287 loadV3F16FromUints,
12292 4 * sizeof(deFloat16),
12294 " %v2f16 = OpTypeVector %f16 2\n"
12295 " %v4f16 = OpTypeVector %f16 4\n"
12296 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12297 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12298 compareFP16VectorInsertFunc<4, replacement>,
12300 loadV4F16FromUints,
12305 const StringTemplate preMain
12307 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12308 " %f16 = OpTypeFloat 16\n"
12309 " %c_f16_ins = OpConstant %f16 ${replacement}\n"
12313 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12314 " %up_u32 = OpTypePointer Uniform %u32\n"
12315 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12316 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12318 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12319 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12320 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12321 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12323 " %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12324 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12326 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12327 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12328 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12331 const StringTemplate decoration
12333 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12334 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12335 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12336 "OpDecorate %SSBO_SRC BufferBlock\n"
12337 "OpDecorate %ssbo_src DescriptorSet 0\n"
12338 "OpDecorate %ssbo_src Binding 0\n"
12340 "OpDecorate %ra_u32 ArrayStride 4\n"
12341 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12342 "OpDecorate %SSBO_IDX BufferBlock\n"
12343 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12344 "OpDecorate %ssbo_idx Binding 1\n"
12346 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12347 "OpDecorate %SSBO_DST BufferBlock\n"
12348 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12349 "OpDecorate %ssbo_dst Binding 2\n"
12352 const StringTemplate testFun
12354 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12355 " %param = OpFunctionParameter %v4f32\n"
12356 " %entry = OpLabel\n"
12358 " %i = OpVariable %fp_i32 Function\n"
12359 " OpStore %i %c_i32_0\n"
12361 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12362 " OpSelectionMerge %end_if None\n"
12363 " OpBranchConditional %will_run %run_test %end_if\n"
12365 " %run_test = OpLabel\n"
12366 " OpBranch %loop\n"
12368 " %loop = OpLabel\n"
12369 " %i_cmp = OpLoad %i32 %i\n"
12370 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12371 " OpLoopMerge %merge %next None\n"
12372 " OpBranchConditional %lt %write %merge\n"
12374 " %write = OpLabel\n"
12375 " %ndx = OpLoad %i32 %i\n"
12377 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12379 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12380 " %val_idx = OpLoad %u32 %src_idx\n"
12382 " %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12383 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12385 " OpBranch %next\n"
12387 " %next = OpLabel\n"
12388 " %i_cur = OpLoad %i32 %i\n"
12389 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12390 " OpStore %i %i_new\n"
12391 " OpBranch %loop\n"
12393 " %merge = OpLabel\n"
12394 " OpBranch %end_if\n"
12395 " %end_if = OpLabel\n"
12396 " OpReturnValue %param\n"
12401 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12403 const TestType& testType = testTypes[testTypeIdx];
12404 const string testName = testType.typeName;
12405 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12406 const size_t iterations = float16InputData.size() / itemsPerType;
12407 SpecResource specResource;
12408 map<string, string> specs;
12409 VulkanFeatures features;
12410 vector<deUint32> inputDataNdx;
12411 map<string, string> fragments;
12412 vector<string> extensions;
12414 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12415 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12417 specs["num_data_points"] = de::toString(iterations);
12418 specs["tt"] = testType.typeName;
12419 specs["ts"] = testType.typeStorage;
12420 specs["tt_stride"] = de::toString(testType.typeStride);
12421 specs["type_decl"] = testType.typeDecls;
12422 specs["replacement"] = de::toString(replacement);
12424 fragments["capability"] = "OpCapability Float16\n";
12425 fragments["decoration"] = decoration.specialize(specs);
12426 fragments["pre_main"] = preMain.specialize(specs);
12427 fragments["testfun"] = testFun.specialize(specs);
12428 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12429 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12431 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12432 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12433 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12434 specResource.verifyIO = testType.verifyIOFunc;
12436 extensions.push_back("VK_KHR_shader_float16_int8");
12438 features.extFloat16Int8.shaderFloat16 = true;
12440 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12443 return testGroup.release();
12446 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
12448 const size_t compNdxCount = (vec1Len + vec2Len + 1);
12449 const size_t compNdxLimited = iteration % (compNdxCount * compNdxCount);
12452 switch (componentNdx)
12454 case 0: comp = compNdxLimited / compNdxCount; break;
12455 case 1: comp = compNdxLimited % compNdxCount; break;
12456 case 2: comp = 0; break;
12457 case 3: comp = 1; break;
12458 default: TCU_THROW(InternalError, "Impossible");
12461 if (comp >= vec1Len + vec2Len)
12469 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12473 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
12474 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12476 DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12477 DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12478 DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12480 if (inputs.size() != 2 || outputAllocs.size() != 1)
12483 vector<deUint8> input1Bytes;
12484 vector<deUint8> input2Bytes;
12486 inputs[0].getBytes(input1Bytes);
12487 inputs[1].getBytes(input2Bytes);
12489 DE_ASSERT(input1Bytes.size() > 0);
12490 DE_ASSERT(input2Bytes.size() > 0);
12491 DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12493 const size_t componentsStrideDst = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12494 const size_t componentsStrideSrc0 = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12495 const size_t componentsStrideSrc1 = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12496 const size_t iterations = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12497 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12498 const deFloat16* const input2AsFP16 = (const deFloat16*)&input2Bytes[0];
12499 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12502 DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12503 DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12505 for (size_t idx = 0; idx < iterations; ++idx)
12507 const deFloat16* input1Vec = &input1AsFP16[componentsStrideSrc0 * idx];
12508 const deFloat16* input2Vec = &input2AsFP16[componentsStrideSrc1 * idx];
12509 const deFloat16* outputVec = &outputAsFP16[componentsStrideDst * idx];
12511 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12513 bool validate = true;
12514 deFloat16 expected = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
12516 if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12518 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12528 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
12530 DE_ASSERT(dstComponentsCount <= 4);
12531 DE_ASSERT(src0ComponentsCount <= 4);
12532 DE_ASSERT(src1ComponentsCount <= 4);
12533 deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12537 case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
12538 case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
12539 case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
12540 case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12541 case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12542 case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12543 case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12544 case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12545 case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12546 case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12547 case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12548 case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12549 case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12550 case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12551 case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12552 case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12553 case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12554 case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12555 case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12556 case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12557 case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12558 case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12559 case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12560 case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12561 case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12562 case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12563 case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12564 default: TCU_THROW(InternalError, "Invalid number of components specified.");
12568 template<class SpecResource>
12569 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12571 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12572 const int testSpecificSeed = deStringHash(testGroup->getName());
12573 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12574 de::Random rnd (seed);
12575 const deUint32 numDataPoints = 128;
12576 map<string, string> fragments;
12580 const deUint32 typeComponents;
12581 const char* typeName;
12582 const string loadFunction;
12583 const string storeFunction;
12586 const TestType testTypes[] =
12597 loadV3F16FromUints,
12603 loadV4F16FromUints,
12608 const StringTemplate preMain
12610 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12611 " %c_i32_cc = OpConstant %i32 ${case_count}\n"
12612 " %f16 = OpTypeFloat 16\n"
12613 " %v2f16 = OpTypeVector %f16 2\n"
12614 " %v3f16 = OpTypeVector %f16 3\n"
12615 " %v4f16 = OpTypeVector %f16 4\n"
12617 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12618 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12619 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12620 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12621 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12622 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12624 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12625 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12626 " %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12627 " %up_u32 = OpTypePointer Uniform %u32\n"
12628 " %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12629 " %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12630 " %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12632 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12633 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12634 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12636 " %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12638 " %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12639 " %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12640 " %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12643 const StringTemplate decoration
12645 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12646 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12647 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12649 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12650 "OpDecorate %SSBO_v2f16 BufferBlock\n"
12652 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12653 "OpDecorate %SSBO_v3f16 BufferBlock\n"
12655 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12656 "OpDecorate %SSBO_v4f16 BufferBlock\n"
12658 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12659 "OpDecorate %ssbo_src0 Binding 0\n"
12660 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12661 "OpDecorate %ssbo_src1 Binding 1\n"
12662 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12663 "OpDecorate %ssbo_dst Binding 2\n"
12666 const StringTemplate testFun
12668 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12669 " %param = OpFunctionParameter %v4f32\n"
12670 " %entry = OpLabel\n"
12672 " %i = OpVariable %fp_i32 Function\n"
12673 " OpStore %i %c_i32_0\n"
12675 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12676 " OpSelectionMerge %end_if None\n"
12677 " OpBranchConditional %will_run %run_test %end_if\n"
12679 " %run_test = OpLabel\n"
12680 " OpBranch %loop\n"
12682 " %loop = OpLabel\n"
12683 " %i_cmp = OpLoad %i32 %i\n"
12684 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12685 " OpLoopMerge %merge %next None\n"
12686 " OpBranchConditional %lt %write %merge\n"
12688 " %write = OpLabel\n"
12689 " %ndx = OpLoad %i32 %i\n"
12690 " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12691 " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12692 " %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12693 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12694 " OpBranch %next\n"
12696 " %next = OpLabel\n"
12697 " %i_cur = OpLoad %i32 %i\n"
12698 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12699 " OpStore %i %i_new\n"
12700 " OpBranch %loop\n"
12702 " %merge = OpLabel\n"
12703 " OpBranch %end_if\n"
12704 " %end_if = OpLabel\n"
12705 " OpReturnValue %param\n"
12709 " %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12710 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12711 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12712 "%sw_paramn = OpFunctionParameter %i32\n"
12713 " %sw_entry = OpLabel\n"
12714 " %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12715 " OpSelectionMerge %switch_e None\n"
12716 " OpSwitch %modulo %default ${case_list}\n"
12718 "%default = OpLabel\n"
12719 " OpUnreachable\n" // Unreachable default case for switch statement
12720 "%switch_e = OpLabel\n"
12721 " OpUnreachable\n" // Unreachable merge block for switch statement
12725 const StringTemplate testCaseBody
12727 "%case_${case_ndx} = OpLabel\n"
12728 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12729 " OpReturnValue %val_dst_${case_ndx}\n"
12732 for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12734 const TestType& dstType = testTypes[dstTypeIdx];
12736 for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12738 const TestType& src0Type = testTypes[comp0Idx];
12740 for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12742 const TestType& src1Type = testTypes[comp1Idx];
12743 const deUint32 input0Stride = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12744 const deUint32 input1Stride = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12745 const deUint32 outputStride = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12746 const vector<deFloat16> float16Input0Data = getFloat16s(rnd, input0Stride * numDataPoints);
12747 const vector<deFloat16> float16Input1Data = getFloat16s(rnd, input1Stride * numDataPoints);
12748 const vector<deFloat16> float16OutputUnused (outputStride * numDataPoints, 0);
12749 const string testName = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12750 deUint32 caseCount = 0;
12751 SpecResource specResource;
12752 map<string, string> specs;
12753 vector<string> extensions;
12754 VulkanFeatures features;
12760 vector<string> componentList;
12762 // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12764 deUint32 caseNo = 0;
12766 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12767 componentList.push_back(de::toString(caseNo++));
12768 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12769 componentList.push_back(de::toString(caseNo++));
12770 componentList.push_back("0xFFFFFFFF");
12773 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12775 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12777 map<string, string> specCase;
12778 string shuffle = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12780 for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12781 shuffle += " " + de::toString(compIdx - 2);
12783 specCase["case_ndx"] = de::toString(caseCount);
12784 specCase["shuffle"] = shuffle;
12785 specCase["tt_dst"] = dstType.typeName;
12787 caseBodies += testCaseBody.specialize(specCase);
12788 caseList += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12795 specs["num_data_points"] = de::toString(numDataPoints);
12796 specs["tt_dst"] = dstType.typeName;
12797 specs["tt_src0"] = src0Type.typeName;
12798 specs["tt_src1"] = src1Type.typeName;
12799 specs["case_bodies"] = caseBodies;
12800 specs["case_list"] = caseList;
12801 specs["case_count"] = de::toString(caseCount);
12803 fragments["capability"] = "OpCapability Float16\n";
12804 fragments["decoration"] = decoration.specialize(specs);
12805 fragments["pre_main"] = preMain.specialize(specs);
12806 fragments["testfun"] = testFun.specialize(specs);
12807 fragments["testfun"] += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12808 fragments["testfun"] += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12809 fragments["testfun"] += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12811 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12812 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12813 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12814 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12816 extensions.push_back("VK_KHR_shader_float16_int8");
12818 features.extFloat16Int8.shaderFloat16 = true;
12820 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12825 return testGroup.release();
12828 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12830 if (inputs.size() != 1 || outputAllocs.size() != 1)
12833 vector<deUint8> input1Bytes;
12835 inputs[0].getBytes(input1Bytes);
12837 DE_ASSERT(input1Bytes.size() > 0);
12838 DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12840 const size_t iterations = input1Bytes.size() / sizeof(deFloat16);
12841 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12842 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12843 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
12846 for (size_t idx = 0; idx < iterations; ++idx)
12848 if (input1AsFP16[idx] == exceptionValue)
12851 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12853 log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12862 template<class SpecResource>
12863 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12865 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12866 const deUint32 numElements = 8;
12867 const string testName = "struct";
12868 const deUint32 structItemsCount = 88;
12869 const deUint32 exceptionIndices[] = { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12870 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
12871 const deUint32 fieldModifier = 2;
12872 const deUint32 fieldModifiedMulIndex = 60;
12873 const deUint32 fieldModifiedAddIndex = 66;
12875 const StringTemplate preMain
12877 " %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12878 " %f16 = OpTypeFloat 16\n"
12879 " %v2f16 = OpTypeVector %f16 2\n"
12880 " %v3f16 = OpTypeVector %f16 3\n"
12881 " %v4f16 = OpTypeVector %f16 4\n"
12882 " %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12886 " %c_f16_n1 = OpConstant %f16 -1.0\n"
12887 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12888 " %c_u32_5 = OpConstant %u32 5\n"
12889 " %c_u32_6 = OpConstant %u32 6\n"
12890 " %c_u32_7 = OpConstant %u32 7\n"
12891 " %c_u32_8 = OpConstant %u32 8\n"
12892 " %c_u32_9 = OpConstant %u32 9\n"
12893 " %c_u32_10 = OpConstant %u32 10\n"
12894 " %c_u32_11 = OpConstant %u32 11\n"
12895 " %c_u32_12 = OpConstant %u32 12\n"
12896 " %c_u32_13 = OpConstant %u32 13\n"
12897 " %c_u32_14 = OpConstant %u32 14\n"
12898 " %c_u32_15 = OpConstant %u32 15\n"
12899 " %c_u32_16 = OpConstant %u32 16\n"
12900 " %c_u32_17 = OpConstant %u32 17\n"
12901 " %c_u32_18 = OpConstant %u32 18\n"
12902 " %c_u32_19 = OpConstant %u32 19\n"
12903 " %c_u32_20 = OpConstant %u32 20\n"
12904 " %c_u32_21 = OpConstant %u32 21\n"
12905 " %c_u32_22 = OpConstant %u32 22\n"
12906 " %c_u32_23 = OpConstant %u32 23\n"
12907 " %c_u32_24 = OpConstant %u32 24\n"
12908 " %c_u32_25 = OpConstant %u32 25\n"
12909 " %c_u32_26 = OpConstant %u32 26\n"
12910 " %c_u32_27 = OpConstant %u32 27\n"
12911 " %c_u32_28 = OpConstant %u32 28\n"
12912 " %c_u32_29 = OpConstant %u32 29\n"
12913 " %c_u32_30 = OpConstant %u32 30\n"
12914 " %c_u32_31 = OpConstant %u32 31\n"
12915 " %c_u32_33 = OpConstant %u32 33\n"
12916 " %c_u32_34 = OpConstant %u32 34\n"
12917 " %c_u32_35 = OpConstant %u32 35\n"
12918 " %c_u32_36 = OpConstant %u32 36\n"
12919 " %c_u32_37 = OpConstant %u32 37\n"
12920 " %c_u32_38 = OpConstant %u32 38\n"
12921 " %c_u32_39 = OpConstant %u32 39\n"
12922 " %c_u32_40 = OpConstant %u32 40\n"
12923 " %c_u32_41 = OpConstant %u32 41\n"
12924 " %c_u32_44 = OpConstant %u32 44\n"
12926 " %f16arr3 = OpTypeArray %f16 %c_u32_3\n"
12927 " %v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
12928 " %v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
12929 " %v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
12930 " %v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
12931 " %struct16 = OpTypeStruct %f16 %v2f16arr3\n"
12932 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12933 " %st_test = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12935 " %up_u32 = OpTypePointer Uniform %u32\n"
12936 " %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
12937 " %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
12938 " %SSBO_st = OpTypeStruct %ra_ra_u32\n"
12939 " %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12941 " %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12944 const StringTemplate decoration
12946 "OpDecorate %SSBO_st BufferBlock\n"
12947 "OpDecorate %ra_u32_44 ArrayStride 4\n"
12948 "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
12949 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12950 "OpDecorate %ssbo_dst Binding 1\n"
12952 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
12954 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12955 "OpMemberDecorate %struct16 0 Offset 0\n"
12956 "OpMemberDecorate %struct16 1 Offset 4\n"
12957 "OpDecorate %struct16arr3 ArrayStride 16\n"
12958 "OpDecorate %f16arr3 ArrayStride 2\n"
12959 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12960 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12961 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12963 "OpMemberDecorate %st_test 0 Offset 0\n"
12964 "OpMemberDecorate %st_test 1 Offset 4\n"
12965 "OpMemberDecorate %st_test 2 Offset 8\n"
12966 "OpMemberDecorate %st_test 3 Offset 16\n"
12967 "OpMemberDecorate %st_test 4 Offset 24\n"
12968 "OpMemberDecorate %st_test 5 Offset 32\n"
12969 "OpMemberDecorate %st_test 6 Offset 80\n"
12970 "OpMemberDecorate %st_test 7 Offset 100\n"
12971 "OpMemberDecorate %st_test 8 Offset 104\n"
12972 "OpMemberDecorate %st_test 9 Offset 144\n"
12975 const StringTemplate testFun
12977 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12978 " %param = OpFunctionParameter %v4f32\n"
12979 " %entry = OpLabel\n"
12981 " %i = OpVariable %fp_i32 Function\n"
12982 " OpStore %i %c_i32_0\n"
12984 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12985 " OpSelectionMerge %end_if None\n"
12986 " OpBranchConditional %will_run %run_test %end_if\n"
12988 " %run_test = OpLabel\n"
12989 " OpBranch %loop\n"
12991 " %loop = OpLabel\n"
12992 " %i_cmp = OpLoad %i32 %i\n"
12993 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12994 " OpLoopMerge %merge %next None\n"
12995 " OpBranchConditional %lt %write %merge\n"
12997 " %write = OpLabel\n"
12998 " %ndx = OpLoad %i32 %i\n"
13000 " %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
13001 " %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
13002 " %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
13004 " %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
13006 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
13007 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
13008 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
13009 " %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
13010 " %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
13012 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
13013 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
13014 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13015 " %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13016 " %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13018 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13019 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13020 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13021 " %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13022 " %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13024 " %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13026 " %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13027 " %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13028 " %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13029 " %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13030 " %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13031 " %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13033 " %fndx = OpConvertSToF %f16 %ndx\n"
13034 " %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13035 " %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13037 " %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13038 " %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13039 " %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13040 " %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13041 " %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13042 " %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13043 " %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13044 " %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13046 " %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13047 " %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13048 " %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13049 " %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13051 " %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
13053 // Storage section: all elements that are not directly accessed should
13054 // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13055 // is constructed with one element from a constant -1.0.
13057 " %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13058 " %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13059 " %bc_0 = OpBitcast %u32 %vec_0\n"
13060 " %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13061 " OpStore %gep_0 %bc_0\n"
13063 // <2 x half> offset 4
13064 " %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13065 " %bc_1 = OpBitcast %u32 %ex_1\n"
13066 " %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13067 " OpStore %gep_1 %bc_1\n"
13069 // <3 x half> offset 8
13070 " %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13071 " %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13072 " %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13073 " %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13074 " %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13075 " %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13076 " %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13077 " OpStore %gep_2_0 %bc_2_0\n"
13078 " OpStore %gep_2_1 %bc_2_1\n"
13080 // <4 x half> offset 16
13081 " %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13082 " %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13083 " %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13084 " %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13085 " %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13086 " %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13087 " %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13088 " OpStore %gep_3_0 %bc_3_0\n"
13089 " OpStore %gep_3_1 %bc_3_1\n"
13091 // [3 x half] offset 24
13092 " %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13093 " %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13094 " %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13095 " %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13096 " %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13097 " %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13098 " %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13099 " %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13100 " %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13101 " OpStore %gep_4_0 %bc_4_0\n"
13102 " OpStore %gep_4_1 %bc_4_1\n"
13104 // [3 x {half, [3 x <2 x half>]}] offset 32
13105 " %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13106 " %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13107 " %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13108 " %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13109 " %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13110 " %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13111 "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13112 "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13113 "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13114 "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13115 "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13116 "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13117 "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13118 "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13119 "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13120 " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13121 " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13122 " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13123 " %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13124 " %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13125 " %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13126 "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13127 "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13128 "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13129 "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13130 "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13131 "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13132 "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13133 "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13134 "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13135 " %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13136 "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13137 "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13138 "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13139 " %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13140 "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13141 "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13142 "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13143 " %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13144 "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13145 "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13146 "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13147 " OpStore %gep_5_0_0 %bc_5_0_0\n"
13148 " OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13149 " OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13150 " OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13151 " OpStore %gep_5_1_0 %bc_5_1_0\n"
13152 " OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13153 " OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13154 " OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13155 " OpStore %gep_5_2_0 %bc_5_2_0\n"
13156 " OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13157 " OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13158 " OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13160 // [5 x <2 x half>] offset 80
13161 " %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13162 " %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13163 " %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13164 " %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13165 " %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13166 " %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13167 " %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13168 " %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13169 " %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13170 " %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13171 " %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13172 " %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13173 " %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13174 " %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13175 " %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13176 " OpStore %gep_6_0 %bc_6_0\n"
13177 " OpStore %gep_6_1 %bc_6_1\n"
13178 " OpStore %gep_6_2 %bc_6_2\n"
13179 " OpStore %gep_6_3 %bc_6_3\n"
13180 " OpStore %gep_6_4 %bc_6_4\n"
13183 " %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13184 " %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13185 " %bc_7 = OpBitcast %u32 %vec_7\n"
13186 " %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13187 " OpStore %gep_7 %bc_7\n"
13189 // [5 x <3 x half>] offset 104
13190 " %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13191 " %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13192 " %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13193 " %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13194 " %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13195 " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13196 " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13197 " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13198 " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13199 " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13200 " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13201 " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13202 " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13203 " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13204 " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13205 " %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13206 " %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13207 " %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13208 " %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13209 " %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13210 " %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13211 " %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13212 " %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13213 " %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13214 " %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13215 " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13216 " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13217 " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13218 " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13219 " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13220 " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13221 " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13222 " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13223 " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13224 " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13225 " OpStore %gep_8_0_0 %bc_8_0_0\n"
13226 " OpStore %gep_8_0_1 %bc_8_0_1\n"
13227 " OpStore %gep_8_1_0 %bc_8_1_0\n"
13228 " OpStore %gep_8_1_1 %bc_8_1_1\n"
13229 " OpStore %gep_8_2_0 %bc_8_2_0\n"
13230 " OpStore %gep_8_2_1 %bc_8_2_1\n"
13231 " OpStore %gep_8_3_0 %bc_8_3_0\n"
13232 " OpStore %gep_8_3_1 %bc_8_3_1\n"
13233 " OpStore %gep_8_4_0 %bc_8_4_0\n"
13234 " OpStore %gep_8_4_1 %bc_8_4_1\n"
13236 // [3 x <4 x half>] offset 144
13237 " %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13238 " %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13239 " %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13240 " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13241 " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13242 " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13243 " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13244 " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13245 " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13246 " %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13247 " %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13248 " %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13249 " %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13250 " %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13251 " %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13252 " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13253 " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13254 " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13255 " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13256 " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13257 " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13258 " OpStore %gep_9_0_0 %bc_9_0_0\n"
13259 " OpStore %gep_9_0_1 %bc_9_0_1\n"
13260 " OpStore %gep_9_1_0 %bc_9_1_0\n"
13261 " OpStore %gep_9_1_1 %bc_9_1_1\n"
13262 " OpStore %gep_9_2_0 %bc_9_2_0\n"
13263 " OpStore %gep_9_2_1 %bc_9_2_1\n"
13265 " OpBranch %next\n"
13267 " %next = OpLabel\n"
13268 " %i_cur = OpLoad %i32 %i\n"
13269 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13270 " OpStore %i %i_new\n"
13271 " OpBranch %loop\n"
13273 " %merge = OpLabel\n"
13274 " OpBranch %end_if\n"
13275 " %end_if = OpLabel\n"
13276 " OpReturnValue %param\n"
13281 SpecResource specResource;
13282 map<string, string> specs;
13283 VulkanFeatures features;
13284 map<string, string> fragments;
13285 vector<string> extensions;
13286 vector<deFloat16> expectedOutput;
13289 for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
13291 vector<deFloat16> expectedIterationOutput;
13293 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13294 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13296 for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13297 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13299 expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13300 expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13302 expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13305 for (deUint32 i = 0; i < structItemsCount; ++i)
13306 consts += " %c_f16_" + de::toString(i) + " = OpConstant %f16 " + de::toString(i) + "\n";
13308 specs["num_elements"] = de::toString(numElements);
13309 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
13310 specs["field_modifier"] = de::toString(fieldModifier);
13311 specs["consts"] = consts;
13313 fragments["capability"] = "OpCapability Float16\n";
13314 fragments["decoration"] = decoration.specialize(specs);
13315 fragments["pre_main"] = preMain.specialize(specs);
13316 fragments["testfun"] = testFun.specialize(specs);
13318 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13319 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13320 specResource.verifyIO = compareFP16CompositeFunc;
13322 extensions.push_back("VK_KHR_shader_float16_int8");
13324 features.extFloat16Int8.shaderFloat16 = true;
13326 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13329 return testGroup.release();
13332 template<class SpecResource>
13333 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
13335 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
13336 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13337 const string opName (op);
13338 const deUint32 opIndex = (opName == "OpCompositeInsert") ? 0
13339 : (opName == "OpCompositeExtract") ? 1
13340 : std::numeric_limits<deUint32>::max();
13342 const StringTemplate preMain
13344 " %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13345 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13346 " %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13347 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13348 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13349 " %f16 = OpTypeFloat 16\n"
13350 " %v2f16 = OpTypeVector %f16 2\n"
13351 " %v3f16 = OpTypeVector %f16 3\n"
13352 " %v4f16 = OpTypeVector %f16 4\n"
13353 " %c_f16_na = OpConstant %f16 -1.0\n"
13354 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13355 " %c_u32_5 = OpConstant %u32 5\n"
13356 " %c_i32_5 = OpConstant %i32 5\n"
13357 " %c_i32_6 = OpConstant %i32 6\n"
13358 " %c_i32_7 = OpConstant %i32 7\n"
13359 " %c_i32_8 = OpConstant %i32 8\n"
13360 " %c_i32_9 = OpConstant %i32 9\n"
13361 " %c_i32_10 = OpConstant %i32 10\n"
13362 " %c_i32_11 = OpConstant %i32 11\n"
13364 "%f16arr3 = OpTypeArray %f16 %c_u32_3\n"
13365 "%v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
13366 "%v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
13367 "%v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
13368 "%v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
13369 "%struct16 = OpTypeStruct %f16 %v2f16arr3\n"
13370 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13371 "%st_test = OpTypeStruct %${field_type}\n"
13373 " %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13374 " %ra_st = OpTypeArray %u32 %c_i32_size\n"
13375 " %up_u32 = OpTypePointer Uniform %u32\n"
13376 " %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13377 "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13378 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13379 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13380 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13381 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13383 "${op_premain_decls}"
13385 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13386 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13388 " %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13389 " %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
13392 const StringTemplate decoration
13394 "OpDecorate %SSBO_src BufferBlock\n"
13395 "OpDecorate %SSBO_dst BufferBlock\n"
13396 "OpDecorate %ra_f16 ArrayStride 4\n"
13397 "OpDecorate %ra_st ArrayStride 4\n"
13398 "OpDecorate %ssbo_src DescriptorSet 0\n"
13399 "OpDecorate %ssbo_src Binding 0\n"
13400 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13401 "OpDecorate %ssbo_dst Binding 1\n"
13403 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
13404 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13406 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13407 "OpMemberDecorate %struct16 0 Offset 0\n"
13408 "OpMemberDecorate %struct16 1 Offset 4\n"
13409 "OpDecorate %struct16arr3 ArrayStride 16\n"
13410 "OpDecorate %f16arr3 ArrayStride 2\n"
13411 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13412 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13413 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13415 "OpMemberDecorate %st_test 0 Offset 0\n"
13418 const StringTemplate testFun
13420 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13421 " %param = OpFunctionParameter %v4f32\n"
13422 " %entry = OpLabel\n"
13424 " %i = OpVariable %fp_i32 Function\n"
13425 " OpStore %i %c_i32_0\n"
13427 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13428 " OpSelectionMerge %end_if None\n"
13429 " OpBranchConditional %will_run %run_test %end_if\n"
13431 " %run_test = OpLabel\n"
13432 " OpBranch %loop\n"
13434 " %loop = OpLabel\n"
13435 " %i_cmp = OpLoad %i32 %i\n"
13436 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13437 " OpLoopMerge %merge %next None\n"
13438 " OpBranchConditional %lt %write %merge\n"
13440 " %write = OpLabel\n"
13441 " %ndx = OpLoad %i32 %i\n"
13443 "${op_sw_fun_call}"
13445 " %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13446 " OpBranch %next\n"
13448 " %next = OpLabel\n"
13449 " %i_cur = OpLoad %i32 %i\n"
13450 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13451 " OpStore %i %i_new\n"
13452 " OpBranch %loop\n"
13454 " %merge = OpLabel\n"
13455 " OpBranch %end_if\n"
13456 " %end_if = OpLabel\n"
13457 " OpReturnValue %param\n"
13460 "${op_sw_fun_header}"
13461 " %sw_param = OpFunctionParameter %st_test\n"
13462 "%sw_paramn = OpFunctionParameter %i32\n"
13463 " %sw_entry = OpLabel\n"
13464 " OpSelectionMerge %switch_e None\n"
13465 " OpSwitch %sw_paramn %default ${case_list}\n"
13469 "%default = OpLabel\n"
13470 " OpReturnValue ${op_case_default_value}\n"
13471 "%switch_e = OpLabel\n"
13472 " OpUnreachable\n" // Unreachable merge block for switch statement
13476 const StringTemplate testCaseBody
13478 "%case_${case_ndx} = OpLabel\n"
13479 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13480 " OpReturnValue %val_ret_${case_ndx}\n"
13483 const string loadF16
13485 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13486 " %ld_${var}_param = OpFunctionParameter %i32\n"
13487 " %ld_${var}_entry = OpLabel\n"
13488 " %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13489 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13490 " OpReturnValue %ld_${var}_st_test\n"
13491 " OpFunctionEnd\n" +
13492 loadScalarF16FromUint
13495 const string loadV2F16
13497 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13498 " %ld_${var}_param = OpFunctionParameter %i32\n"
13499 " %ld_${var}_entry = OpLabel\n"
13500 " %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13501 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13502 " OpReturnValue %ld_${var}_st_test\n"
13503 " OpFunctionEnd\n" +
13507 const string loadV3F16
13509 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13510 " %ld_${var}_param = OpFunctionParameter %i32\n"
13511 " %ld_${var}_entry = OpLabel\n"
13512 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13513 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13514 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13515 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13516 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13517 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13518 " %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13519 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13520 " OpReturnValue %ld_${var}_st_test\n"
13524 const string loadV4F16
13526 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13527 " %ld_${var}_param = OpFunctionParameter %i32\n"
13528 " %ld_${var}_entry = OpLabel\n"
13529 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13530 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13531 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13532 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13533 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13534 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13535 " %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13536 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13537 " OpReturnValue %ld_${var}_st_test\n"
13541 const string loadF16Arr3
13543 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13544 " %ld_${var}_param = OpFunctionParameter %i32\n"
13545 " %ld_${var}_entry = OpLabel\n"
13546 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13547 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13548 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13549 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13550 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13551 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13552 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13553 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13554 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13555 " %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13556 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13557 " OpReturnValue %ld_${var}_st_test\n"
13561 const string loadV2F16Arr5
13563 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13564 " %ld_${var}_param = OpFunctionParameter %i32\n"
13565 " %ld_${var}_label = OpLabel\n"
13566 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13567 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13568 " %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13569 " %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13570 " %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13571 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13572 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13573 " %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13574 " %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13575 " %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13576 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13577 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13578 " %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13579 " %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13580 " %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13581 " %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13582 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13583 " OpReturnValue %ld_${var}_st_test\n"
13587 const string loadV3F16Arr5
13589 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13590 " %ld_${var}_param = OpFunctionParameter %i32\n"
13591 " %ld_${var}_entry = OpLabel\n"
13592 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13593 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13594 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13595 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13596 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13597 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13598 "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13599 "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13600 "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13601 "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13602 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13603 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13604 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13605 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13606 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13607 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13608 " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13609 " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13610 " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13611 " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13612 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13613 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13614 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13615 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13616 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13617 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13618 " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13619 " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13620 " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13621 " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13622 " %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13623 " %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13624 " %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13625 " %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13626 " %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13627 " %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13628 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13629 " OpReturnValue %ld_${var}_st_test\n"
13633 const string loadV4F16Arr3
13635 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13636 " %ld_${var}_param = OpFunctionParameter %i32\n"
13637 " %ld_${var}_entry = OpLabel\n"
13638 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13639 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13640 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13641 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13642 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13643 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13644 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13645 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13646 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13647 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13648 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13649 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13650 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13651 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13652 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13653 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13654 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13655 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13656 " %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13657 " %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13658 " %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13659 " %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13660 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13661 " OpReturnValue %ld_${var}_st_test\n"
13665 const string loadStruct16Arr3
13667 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13668 " %ld_${var}_param = OpFunctionParameter %i32\n"
13669 " %ld_${var}_entry = OpLabel\n"
13670 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13671 "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13672 "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13673 "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13674 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13675 "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13676 "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13677 "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13678 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13679 "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13680 "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13681 "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13682 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13683 " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13684 " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13685 " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13686 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13687 " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13688 " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13689 " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13690 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13691 " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13692 " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13693 " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13694 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13695 " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13696 " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13697 " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13698 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13699 " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13700 " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13701 " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13702 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13703 " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13704 " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13705 " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13706 " %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13707 " %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13708 " %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13709 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13710 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13711 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13712 " %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13713 " %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13714 " %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13715 " %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13716 " %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13717 " OpReturnValue %ld_${var}_st_test\n"
13721 const string storeF16
13723 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13724 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13725 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13726 " %st_${var}_entry = OpLabel\n"
13727 " %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13728 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13730 " OpFunctionEnd\n" +
13731 storeScalarF16AsUint
13734 const string storeV2F16
13736 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13737 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13738 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13739 " %st_${var}_entry = OpLabel\n"
13740 " %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13741 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13743 " OpFunctionEnd\n" +
13747 const string storeV3F16
13749 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13750 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13751 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13752 " %st_${var}_entry = OpLabel\n"
13753 " %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13754 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13755 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13756 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13757 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13758 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13759 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13760 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13761 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13766 const string storeV4F16
13768 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13769 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13770 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13771 " %st_${var}_entry = OpLabel\n"
13772 " %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13773 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13774 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13775 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13776 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13777 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13778 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13779 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13780 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13785 const string storeF16Arr3
13787 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13788 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13789 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13790 " %st_${var}_entry = OpLabel\n"
13791 " %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13792 " %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13793 " %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13794 " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13795 " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13796 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13797 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13798 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13799 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13800 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13801 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13806 const string storeV2F16Arr5
13808 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13809 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13810 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13811 " %st_${var}_entry = OpLabel\n"
13812 " %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13813 " %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13814 " %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13815 " %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13816 " %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13817 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13818 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13819 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13820 " %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13821 " %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13822 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13823 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13824 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13825 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13826 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13827 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13828 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13829 " OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13830 " OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13831 " OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13836 const string storeV3F16Arr5
13838 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13839 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13840 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13841 " %st_${var}_entry = OpLabel\n"
13842 " %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13843 " %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13844 " %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13845 " %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13846 " %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13847 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13848 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13849 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13850 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13851 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13852 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13853 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13854 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13855 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13856 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13857 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13858 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13859 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13860 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13861 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13862 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13863 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13864 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13865 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13866 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13867 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13868 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13869 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13870 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13871 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13872 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13873 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13874 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13875 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13876 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13877 " OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13878 " OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13879 " OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13880 " OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13881 " OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13882 " OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13883 " OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13884 " OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13885 " OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13886 " OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13891 const string storeV4F16Arr3
13893 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13894 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13895 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13896 " %st_${var}_entry = OpLabel\n"
13897 " %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13898 " %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13899 " %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13900 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13901 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
13902 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
13903 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
13904 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
13905 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
13906 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
13907 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
13908 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
13909 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
13910 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
13911 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
13912 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13913 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13914 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13915 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13916 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13917 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13918 " OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
13919 " OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
13920 " OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
13921 " OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
13922 " OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
13923 " OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
13928 const string storeStruct16Arr3
13930 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13931 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13932 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13933 " %st_${var}_entry = OpLabel\n"
13934 " %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
13935 " %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
13936 " %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
13937 " %st_${var}_el_0 = OpCompositeExtract %f16 %st_${var}_st_0 0\n"
13938 " %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
13939 " %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
13940 " %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
13941 " %st_${var}_el_1 = OpCompositeExtract %f16 %st_${var}_st_1 0\n"
13942 " %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
13943 " %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
13944 " %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
13945 " %st_${var}_el_2 = OpCompositeExtract %f16 %st_${var}_st_2 0\n"
13946 " %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
13947 " %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
13948 " %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
13949 " %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
13950 " %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
13951 " %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
13952 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_v2_0\n"
13953 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13954 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13955 " %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
13956 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_v2_1\n"
13957 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13958 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13959 " %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
13960 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_v2_2\n"
13961 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13962 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13963 " %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
13964 "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13965 "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13966 "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13967 "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13968 "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13969 "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13970 "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13971 "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13972 "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13973 "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13974 "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13975 "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13976 " OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
13977 " OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
13978 " OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
13979 " OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
13980 " OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
13981 " OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
13982 " OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
13983 " OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
13984 " OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
13985 " OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
13986 " OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
13987 " OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
13994 const char* premainDecls;
13995 const char* swFunCall;
13996 const char* swFunHeader;
13997 const char* caseDefaultValue;
13998 const char* argsPartial;
14001 OpParts opPartsArray[] =
14003 // OpCompositeInsert
14005 " %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
14006 " %SSBO_src = OpTypeStruct %ra_f16\n"
14007 " %SSBO_dst = OpTypeStruct %ra_st\n",
14009 " %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
14010 " %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
14011 " %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
14013 " %sw_fun = OpFunction %st_test None %fun_t\n"
14014 "%sw_paramv = OpFunctionParameter %f16\n",
14018 "%st_test %sw_paramv %sw_param",
14020 // OpCompositeExtract
14022 " %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14023 " %SSBO_src = OpTypeStruct %ra_st\n"
14024 " %SSBO_dst = OpTypeStruct %ra_f16\n",
14026 " %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14027 " %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14029 " %sw_fun = OpFunction %f16 None %fun_t\n",
14037 DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14039 const char* accessPathF16[] =
14044 const char* accessPathV2F16[] =
14049 const char* accessPathV3F16[] =
14056 const char* accessPathV4F16[] =
14063 const char* accessPathF16Arr3[] =
14070 const char* accessPathStruct16Arr3[] =
14072 "0 0 0", // %struct16arr3
14097 const char* accessPathV2F16Arr5[] =
14099 "0 0 0", // %v2f16arr5
14110 const char* accessPathV3F16Arr5[] =
14112 "0 0 0", // %v3f16arr5
14133 const char* accessPathV4F16Arr3[] =
14135 "0 0 0", // %v4f16arr3
14153 struct TypeTestParameters
14156 size_t accessPathLength;
14157 const char** accessPath;
14158 const string loadFunction;
14159 const string storeFunction;
14162 const TypeTestParameters typeTestParameters[] =
14164 { "f16", DE_LENGTH_OF_ARRAY(accessPathF16), accessPathF16, loadF16, storeF16 },
14165 { "v2f16", DE_LENGTH_OF_ARRAY(accessPathV2F16), accessPathV2F16, loadV2F16, storeV2F16 },
14166 { "v3f16", DE_LENGTH_OF_ARRAY(accessPathV3F16), accessPathV3F16, loadV3F16, storeV3F16 },
14167 { "v4f16", DE_LENGTH_OF_ARRAY(accessPathV4F16), accessPathV4F16, loadV4F16, storeV4F16 },
14168 { "f16arr3", DE_LENGTH_OF_ARRAY(accessPathF16Arr3), accessPathF16Arr3, loadF16Arr3, storeF16Arr3 },
14169 { "v2f16arr5", DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5), accessPathV2F16Arr5, loadV2F16Arr5, storeV2F16Arr5 },
14170 { "v3f16arr5", DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5), accessPathV3F16Arr5, loadV3F16Arr5, storeV3F16Arr5 },
14171 { "v4f16arr3", DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3), accessPathV4F16Arr3, loadV4F16Arr3, storeV4F16Arr3 },
14172 { "struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3), accessPathStruct16Arr3, loadStruct16Arr3, storeStruct16Arr3},
14175 for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14177 const OpParts opParts = opPartsArray[opIndex];
14178 const string testName = typeTestParameters[typeTestNdx].name;
14179 const size_t structItemsCount = typeTestParameters[typeTestNdx].accessPathLength;
14180 const char** accessPath = typeTestParameters[typeTestNdx].accessPath;
14181 SpecResource specResource;
14182 map<string, string> specs;
14183 VulkanFeatures features;
14184 map<string, string> fragments;
14185 vector<string> extensions;
14186 vector<deFloat16> inputFP16;
14187 vector<deFloat16> unusedFP16Output;
14189 // Generate values for input
14190 inputFP16.reserve(structItemsCount);
14191 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14192 inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
14194 unusedFP16Output.resize(structItemsCount);
14196 // Generate cases for OpSwitch
14201 for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14202 if (accessPath[caseNdx] != DE_NULL)
14204 map<string, string> specCase;
14206 specCase["case_ndx"] = de::toString(caseNdx);
14207 specCase["access_path"] = accessPath[caseNdx];
14208 specCase["op_args_part"] = opParts.argsPartial;
14209 specCase["op_name"] = opName;
14211 caseBodies += testCaseBody.specialize(specCase);
14212 caseList += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14215 specs["case_bodies"] = caseBodies;
14216 specs["case_list"] = caseList;
14219 specs["num_elements"] = de::toString(structItemsCount);
14220 specs["field_type"] = typeTestParameters[typeTestNdx].name;
14221 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
14222 specs["struct_u32s"] = de::toString(structItemsCount / 2);
14223 specs["op_premain_decls"] = opParts.premainDecls;
14224 specs["op_sw_fun_call"] = opParts.swFunCall;
14225 specs["op_sw_fun_header"] = opParts.swFunHeader;
14226 specs["op_case_default_value"] = opParts.caseDefaultValue;
14227 if (opIndex == 0) {
14228 specs["st_call"] = "st_ssbo_dst";
14229 specs["st_ndx"] = "c_i32_0";
14231 specs["st_call"] = "st_fn_ssbo_dst";
14232 specs["st_ndx"] = "ndx";
14235 fragments["capability"] = "OpCapability Float16\n";
14236 fragments["decoration"] = decoration.specialize(specs);
14237 fragments["pre_main"] = preMain.specialize(specs);
14238 fragments["testfun"] = testFun.specialize(specs);
14239 if (opIndex == 0) {
14240 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14241 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14242 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14244 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14245 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14248 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14249 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(unusedFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14250 specResource.verifyIO = compareFP16CompositeFunc;
14252 extensions.push_back("VK_KHR_shader_float16_int8");
14254 features.extFloat16Int8.shaderFloat16 = true;
14256 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
14259 return testGroup.release();
14262 struct fp16PerComponent
14266 , floatFormat16 (-14, 15, 10, true)
14268 , argCompCount(3, 0)
14272 bool callOncePerComponent () { return true; }
14273 deUint32 getComponentValidity () { return static_cast<deUint32>(-1); }
14275 virtual double getULPs (vector<const deFloat16*>&) { return 1.0; }
14276 virtual double getMin (double value, double ulps) { return value - floatFormat16.ulp(deAbs(value), ulps); }
14277 virtual double getMax (double value, double ulps) { return value + floatFormat16.ulp(deAbs(value), ulps); }
14279 virtual size_t getFlavorCount () { return flavorNames.empty() ? 1 : flavorNames.size(); }
14280 virtual void setFlavor (size_t flavorNo) { DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
14281 virtual size_t getFlavor () { return flavor; }
14282 virtual string getCurrentFlavorName () { return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
14284 virtual void setOutCompCount (size_t compCount) { outCompCount = compCount; }
14285 virtual size_t getOutCompCount () { return outCompCount; }
14287 virtual void setArgCompCount (size_t argNo, size_t compCount) { argCompCount[argNo] = compCount; }
14288 virtual size_t getArgCompCount (size_t argNo) { return argCompCount[argNo]; }
14292 tcu::FloatFormat floatFormat16;
14293 size_t outCompCount;
14294 vector<size_t> argCompCount;
14295 vector<string> flavorNames;
14298 struct fp16OpFNegate : public fp16PerComponent
14300 template <class fp16type>
14301 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14303 const fp16type x (*in[0]);
14304 const double d (x.asDouble());
14305 const double result (0.0 - d);
14307 out[0] = fp16type(result).bits();
14308 min[0] = getMin(result, getULPs(in));
14309 max[0] = getMax(result, getULPs(in));
14315 struct fp16Round : public fp16PerComponent
14317 fp16Round() : fp16PerComponent()
14319 flavorNames.push_back("Floor(x+0.5)");
14320 flavorNames.push_back("Floor(x-0.5)");
14321 flavorNames.push_back("RoundEven");
14324 template<class fp16type>
14325 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14327 const fp16type x (*in[0]);
14328 const double d (x.asDouble());
14329 double result (0.0);
14333 case 0: result = deRound(d); break;
14334 case 1: result = deFloor(d - 0.5); break;
14335 case 2: result = deRoundEven(d); break;
14336 default: TCU_THROW(InternalError, "Invalid flavor specified");
14339 out[0] = fp16type(result).bits();
14340 min[0] = getMin(result, getULPs(in));
14341 max[0] = getMax(result, getULPs(in));
14347 struct fp16RoundEven : public fp16PerComponent
14349 template<class fp16type>
14350 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14352 const fp16type x (*in[0]);
14353 const double d (x.asDouble());
14354 const double result (deRoundEven(d));
14356 out[0] = fp16type(result).bits();
14357 min[0] = getMin(result, getULPs(in));
14358 max[0] = getMax(result, getULPs(in));
14364 struct fp16Trunc : public fp16PerComponent
14366 template<class fp16type>
14367 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14369 const fp16type x (*in[0]);
14370 const double d (x.asDouble());
14371 const double result (deTrunc(d));
14373 out[0] = fp16type(result).bits();
14374 min[0] = getMin(result, getULPs(in));
14375 max[0] = getMax(result, getULPs(in));
14381 struct fp16FAbs : public fp16PerComponent
14383 template<class fp16type>
14384 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14386 const fp16type x (*in[0]);
14387 const double d (x.asDouble());
14388 const double result (deAbs(d));
14390 out[0] = fp16type(result).bits();
14391 min[0] = getMin(result, getULPs(in));
14392 max[0] = getMax(result, getULPs(in));
14398 struct fp16FSign : public fp16PerComponent
14400 template<class fp16type>
14401 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14403 const fp16type x (*in[0]);
14404 const double d (x.asDouble());
14405 const double result (deSign(d));
14410 out[0] = fp16type(result).bits();
14411 min[0] = getMin(result, getULPs(in));
14412 max[0] = getMax(result, getULPs(in));
14418 struct fp16Floor : public fp16PerComponent
14420 template<class fp16type>
14421 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14423 const fp16type x (*in[0]);
14424 const double d (x.asDouble());
14425 const double result (deFloor(d));
14427 out[0] = fp16type(result).bits();
14428 min[0] = getMin(result, getULPs(in));
14429 max[0] = getMax(result, getULPs(in));
14435 struct fp16Ceil : public fp16PerComponent
14437 template<class fp16type>
14438 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14440 const fp16type x (*in[0]);
14441 const double d (x.asDouble());
14442 const double result (deCeil(d));
14444 out[0] = fp16type(result).bits();
14445 min[0] = getMin(result, getULPs(in));
14446 max[0] = getMax(result, getULPs(in));
14452 struct fp16Fract : public fp16PerComponent
14454 template<class fp16type>
14455 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14457 const fp16type x (*in[0]);
14458 const double d (x.asDouble());
14459 const double result (deFrac(d));
14461 out[0] = fp16type(result).bits();
14462 min[0] = getMin(result, getULPs(in));
14463 max[0] = getMax(result, getULPs(in));
14469 struct fp16Radians : public fp16PerComponent
14471 virtual double getULPs (vector<const deFloat16*>& in)
14478 template<class fp16type>
14479 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14481 const fp16type x (*in[0]);
14482 const float d (x.asFloat());
14483 const float result (deFloatRadians(d));
14485 out[0] = fp16type(result).bits();
14486 min[0] = getMin(result, getULPs(in));
14487 max[0] = getMax(result, getULPs(in));
14493 struct fp16Degrees : public fp16PerComponent
14495 virtual double getULPs (vector<const deFloat16*>& in)
14502 template<class fp16type>
14503 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14505 const fp16type x (*in[0]);
14506 const float d (x.asFloat());
14507 const float result (deFloatDegrees(d));
14509 out[0] = fp16type(result).bits();
14510 min[0] = getMin(result, getULPs(in));
14511 max[0] = getMax(result, getULPs(in));
14517 struct fp16Sin : public fp16PerComponent
14519 template<class fp16type>
14520 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14522 const fp16type x (*in[0]);
14523 const double d (x.asDouble());
14524 const double result (deSin(d));
14525 const double unspecUlp (16.0);
14526 const double err (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14528 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14531 out[0] = fp16type(result).bits();
14532 min[0] = result - err;
14533 max[0] = result + err;
14539 struct fp16Cos : public fp16PerComponent
14541 template<class fp16type>
14542 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14544 const fp16type x (*in[0]);
14545 const double d (x.asDouble());
14546 const double result (deCos(d));
14547 const double unspecUlp (16.0);
14548 const double err (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14550 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14553 out[0] = fp16type(result).bits();
14554 min[0] = result - err;
14555 max[0] = result + err;
14561 struct fp16Tan : public fp16PerComponent
14563 template<class fp16type>
14564 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14566 const fp16type x (*in[0]);
14567 const double d (x.asDouble());
14568 const double result (deTan(d));
14570 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14573 out[0] = fp16type(result).bits();
14575 const double err = deLdExp(1.0, -7);
14576 const double s1 = deSin(d) + err;
14577 const double s2 = deSin(d) - err;
14578 const double c1 = deCos(d) + err;
14579 const double c2 = deCos(d) - err;
14580 const double edgeVals[] = {s1/c1, s1/c2, s2/c1, s2/c2};
14581 double edgeLeft = out[0];
14582 double edgeRight = out[0];
14584 if (deSign(c1 * c2) < 0.0)
14586 edgeLeft = -std::numeric_limits<double>::infinity();
14587 edgeRight = +std::numeric_limits<double>::infinity();
14591 edgeLeft = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14592 edgeRight = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14596 max[0] = edgeRight;
14603 struct fp16Asin : public fp16PerComponent
14605 template<class fp16type>
14606 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14608 const fp16type x (*in[0]);
14609 const double d (x.asDouble());
14610 const double result (deAsin(d));
14611 const double error (deAtan2(d, sqrt(1.0 - d * d)));
14613 if (!x.isNaN() && deAbs(d) > 1.0)
14616 out[0] = fp16type(result).bits();
14617 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14618 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14624 struct fp16Acos : public fp16PerComponent
14626 template<class fp16type>
14627 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14629 const fp16type x (*in[0]);
14630 const double d (x.asDouble());
14631 const double result (deAcos(d));
14632 const double error (deAtan2(sqrt(1.0 - d * d), d));
14634 if (!x.isNaN() && deAbs(d) > 1.0)
14637 out[0] = fp16type(result).bits();
14638 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14639 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14645 struct fp16Atan : public fp16PerComponent
14647 virtual double getULPs(vector<const deFloat16*>& in)
14651 return 2 * 5.0; // This is not a precision test. Value is not from spec
14654 template<class fp16type>
14655 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14657 const fp16type x (*in[0]);
14658 const double d (x.asDouble());
14659 const double result (deAtanOver(d));
14661 out[0] = fp16type(result).bits();
14662 min[0] = getMin(result, getULPs(in));
14663 max[0] = getMax(result, getULPs(in));
14669 struct fp16Sinh : public fp16PerComponent
14671 fp16Sinh() : fp16PerComponent()
14673 flavorNames.push_back("Double");
14674 flavorNames.push_back("ExpFP16");
14677 template<class fp16type>
14678 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14680 const fp16type x (*in[0]);
14681 const double d (x.asDouble());
14682 const double ulps (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14683 double result (0.0);
14684 double error (0.0);
14686 if (getFlavor() == 0)
14688 result = deSinh(d);
14689 error = floatFormat16.ulp(deAbs(result), ulps);
14691 else if (getFlavor() == 1)
14693 const fp16type epx (deExp(d));
14694 const fp16type enx (deExp(-d));
14695 const fp16type esx (epx.asDouble() - enx.asDouble());
14696 const fp16type sx2 (esx.asDouble() / 2.0);
14698 result = sx2.asDouble();
14699 error = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14703 TCU_THROW(InternalError, "Unknown flavor");
14706 out[0] = fp16type(result).bits();
14707 min[0] = result - error;
14708 max[0] = result + error;
14714 struct fp16Cosh : public fp16PerComponent
14716 fp16Cosh() : fp16PerComponent()
14718 flavorNames.push_back("Double");
14719 flavorNames.push_back("ExpFP16");
14722 template<class fp16type>
14723 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14725 const fp16type x (*in[0]);
14726 const double d (x.asDouble());
14727 const double ulps (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14728 double result (0.0);
14730 if (getFlavor() == 0)
14732 result = deCosh(d);
14734 else if (getFlavor() == 1)
14736 const fp16type epx (deExp(d));
14737 const fp16type enx (deExp(-d));
14738 const fp16type esx (epx.asDouble() + enx.asDouble());
14739 const fp16type sx2 (esx.asDouble() / 2.0);
14741 result = sx2.asDouble();
14745 TCU_THROW(InternalError, "Unknown flavor");
14748 out[0] = fp16type(result).bits();
14749 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14750 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14756 struct fp16Tanh : public fp16PerComponent
14758 fp16Tanh() : fp16PerComponent()
14760 flavorNames.push_back("Tanh");
14761 flavorNames.push_back("SinhCosh");
14762 flavorNames.push_back("SinhCoshFP16");
14763 flavorNames.push_back("PolyFP16");
14766 virtual double getULPs (vector<const deFloat16*>& in)
14768 const tcu::Float16 x (*in[0]);
14769 const double d (x.asDouble());
14771 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14774 template<class fp16type>
14775 inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14777 const fp16type esx (espx.asDouble() - esnx.asDouble());
14778 const fp16type sx2 (esx.asDouble() / 2.0);
14779 const fp16type ecx (ecpx.asDouble() + ecnx.asDouble());
14780 const fp16type cx2 (ecx.asDouble() / 2.0);
14781 const fp16type tg (sx2.asDouble() / cx2.asDouble());
14782 const double rez (tg.asDouble());
14787 template<class fp16type>
14788 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14790 const fp16type x (*in[0]);
14791 const double d (x.asDouble());
14792 double result (0.0);
14794 if (getFlavor() == 0)
14796 result = deTanh(d);
14797 min[0] = getMin(result, getULPs(in));
14798 max[0] = getMax(result, getULPs(in));
14800 else if (getFlavor() == 1)
14802 result = deSinh(d) / deCosh(d);
14803 min[0] = getMin(result, getULPs(in));
14804 max[0] = getMax(result, getULPs(in));
14806 else if (getFlavor() == 2)
14808 const fp16type s (deSinh(d));
14809 const fp16type c (deCosh(d));
14811 result = s.asDouble() / c.asDouble();
14812 min[0] = getMin(result, getULPs(in));
14813 max[0] = getMax(result, getULPs(in));
14815 else if (getFlavor() == 3)
14817 const double ulps (getULPs(in));
14818 const double epxm (deExp( d));
14819 const double enxm (deExp(-d));
14820 const double epxmerr = floatFormat16.ulp(epxm, ulps);
14821 const double enxmerr = floatFormat16.ulp(enxm, ulps);
14822 const fp16type epx[] = { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14823 const fp16type enx[] = { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14824 const fp16type epxm16 (epxm);
14825 const fp16type enxm16 (enxm);
14826 vector<double> tgs;
14828 for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14829 for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14830 for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14831 for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14833 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14835 tgs.push_back(tgh);
14838 result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14839 min[0] = *std::min_element(tgs.begin(), tgs.end());
14840 max[0] = *std::max_element(tgs.begin(), tgs.end());
14844 TCU_THROW(InternalError, "Unknown flavor");
14847 out[0] = fp16type(result).bits();
14853 struct fp16Asinh : public fp16PerComponent
14855 fp16Asinh() : fp16PerComponent()
14857 flavorNames.push_back("Double");
14858 flavorNames.push_back("PolyFP16Wiki");
14859 flavorNames.push_back("PolyFP16Abs");
14862 virtual double getULPs (vector<const deFloat16*>& in)
14866 return 256.0; // This is not a precision test. Value is not from spec
14869 template<class fp16type>
14870 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14872 const fp16type x (*in[0]);
14873 const double d (x.asDouble());
14874 double result (0.0);
14876 if (getFlavor() == 0)
14878 result = deAsinh(d);
14880 else if (getFlavor() == 1)
14882 const fp16type x2 (d * d);
14883 const fp16type x2p1 (x2.asDouble() + 1.0);
14884 const fp16type sq (deSqrt(x2p1.asDouble()));
14885 const fp16type sxsq (d + sq.asDouble());
14886 const fp16type lsxsq (deLog(sxsq.asDouble()));
14891 result = lsxsq.asDouble();
14893 else if (getFlavor() == 2)
14895 const fp16type x2 (d * d);
14896 const fp16type x2p1 (x2.asDouble() + 1.0);
14897 const fp16type sq (deSqrt(x2p1.asDouble()));
14898 const fp16type sxsq (deAbs(d) + sq.asDouble());
14899 const fp16type lsxsq (deLog(sxsq.asDouble()));
14901 result = deSign(d) * lsxsq.asDouble();
14905 TCU_THROW(InternalError, "Unknown flavor");
14908 out[0] = fp16type(result).bits();
14909 min[0] = getMin(result, getULPs(in));
14910 max[0] = getMax(result, getULPs(in));
14916 struct fp16Acosh : public fp16PerComponent
14918 fp16Acosh() : fp16PerComponent()
14920 flavorNames.push_back("Double");
14921 flavorNames.push_back("PolyFP16");
14924 virtual double getULPs (vector<const deFloat16*>& in)
14928 return 16.0; // This is not a precision test. Value is not from spec
14931 template<class fp16type>
14932 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14934 const fp16type x (*in[0]);
14935 const double d (x.asDouble());
14936 double result (0.0);
14938 if (!x.isNaN() && d < 1.0)
14941 if (getFlavor() == 0)
14943 result = deAcosh(d);
14945 else if (getFlavor() == 1)
14947 const fp16type x2 (d * d);
14948 const fp16type x2m1 (x2.asDouble() - 1.0);
14949 const fp16type sq (deSqrt(x2m1.asDouble()));
14950 const fp16type sxsq (d + sq.asDouble());
14951 const fp16type lsxsq (deLog(sxsq.asDouble()));
14953 result = lsxsq.asDouble();
14957 TCU_THROW(InternalError, "Unknown flavor");
14960 out[0] = fp16type(result).bits();
14961 min[0] = getMin(result, getULPs(in));
14962 max[0] = getMax(result, getULPs(in));
14968 struct fp16Atanh : public fp16PerComponent
14970 fp16Atanh() : fp16PerComponent()
14972 flavorNames.push_back("Double");
14973 flavorNames.push_back("PolyFP16");
14976 template<class fp16type>
14977 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14979 const fp16type x (*in[0]);
14980 const double d (x.asDouble());
14981 double result (0.0);
14983 if (deAbs(d) >= 1.0)
14986 if (getFlavor() == 0)
14988 const double ulps (16.0); // This is not a precision test. Value is not from spec
14990 result = deAtanh(d);
14991 min[0] = getMin(result, ulps);
14992 max[0] = getMax(result, ulps);
14994 else if (getFlavor() == 1)
14996 const fp16type x1a (1.0 + d);
14997 const fp16type x1b (1.0 - d);
14998 const fp16type x1d (x1a.asDouble() / x1b.asDouble());
14999 const fp16type lx1d (deLog(x1d.asDouble()));
15000 const fp16type lx1d2 (0.5 * lx1d.asDouble());
15001 const double error (2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
15003 result = lx1d2.asDouble();
15004 min[0] = result - error;
15005 max[0] = result + error;
15009 TCU_THROW(InternalError, "Unknown flavor");
15012 out[0] = fp16type(result).bits();
15018 struct fp16Exp : public fp16PerComponent
15020 template<class fp16type>
15021 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15023 const fp16type x (*in[0]);
15024 const double d (x.asDouble());
15025 const double ulps (10.0 * (1.0 + 2.0 * deAbs(d)));
15026 const double result (deExp(d));
15028 out[0] = fp16type(result).bits();
15029 min[0] = getMin(result, ulps);
15030 max[0] = getMax(result, ulps);
15036 struct fp16Log : public fp16PerComponent
15038 template<class fp16type>
15039 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15041 const fp16type x (*in[0]);
15042 const double d (x.asDouble());
15043 const double result (deLog(d));
15044 const double error (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15049 out[0] = fp16type(result).bits();
15050 min[0] = result - error;
15051 max[0] = result + error;
15057 struct fp16Exp2 : public fp16PerComponent
15059 template<class fp16type>
15060 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15062 const fp16type x (*in[0]);
15063 const double d (x.asDouble());
15064 const double result (deExp2(d));
15065 const double ulps (1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15067 out[0] = fp16type(result).bits();
15068 min[0] = getMin(result, ulps);
15069 max[0] = getMax(result, ulps);
15075 struct fp16Log2 : public fp16PerComponent
15077 template<class fp16type>
15078 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15080 const fp16type x (*in[0]);
15081 const double d (x.asDouble());
15082 const double result (deLog2(d));
15083 const double error (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15088 out[0] = fp16type(result).bits();
15089 min[0] = result - error;
15090 max[0] = result + error;
15096 struct fp16Sqrt : public fp16PerComponent
15098 virtual double getULPs (vector<const deFloat16*>& in)
15105 template<class fp16type>
15106 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15108 const fp16type x (*in[0]);
15109 const double d (x.asDouble());
15110 const double result (deSqrt(d));
15112 if (!x.isNaN() && d < 0.0)
15115 out[0] = fp16type(result).bits();
15116 min[0] = getMin(result, getULPs(in));
15117 max[0] = getMax(result, getULPs(in));
15123 struct fp16InverseSqrt : public fp16PerComponent
15125 virtual double getULPs (vector<const deFloat16*>& in)
15132 template<class fp16type>
15133 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15135 const fp16type x (*in[0]);
15136 const double d (x.asDouble());
15137 const double result (1.0/deSqrt(d));
15139 if (!x.isNaN() && d <= 0.0)
15142 out[0] = fp16type(result).bits();
15143 min[0] = getMin(result, getULPs(in));
15144 max[0] = getMax(result, getULPs(in));
15150 struct fp16ModfFrac : public fp16PerComponent
15152 template<class fp16type>
15153 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15155 const fp16type x (*in[0]);
15156 const double d (x.asDouble());
15158 const double result (deModf(d, &i));
15160 if (x.isInf() || x.isNaN())
15163 out[0] = fp16type(result).bits();
15164 min[0] = getMin(result, getULPs(in));
15165 max[0] = getMax(result, getULPs(in));
15171 struct fp16ModfInt : public fp16PerComponent
15173 template<class fp16type>
15174 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15176 const fp16type x (*in[0]);
15177 const double d (x.asDouble());
15179 const double unused (deModf(d, &i));
15180 const double result (i);
15184 if (x.isInf() || x.isNaN())
15187 out[0] = fp16type(result).bits();
15188 min[0] = getMin(result, getULPs(in));
15189 max[0] = getMax(result, getULPs(in));
15195 struct fp16FrexpS : public fp16PerComponent
15197 template<class fp16type>
15198 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15200 const fp16type x (*in[0]);
15201 const double d (x.asDouble());
15203 const double result (deFrExp(d, &e));
15205 if (x.isNaN() || x.isInf())
15208 out[0] = fp16type(result).bits();
15209 min[0] = getMin(result, getULPs(in));
15210 max[0] = getMax(result, getULPs(in));
15216 struct fp16FrexpE : public fp16PerComponent
15218 template<class fp16type>
15219 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15221 const fp16type x (*in[0]);
15222 const double d (x.asDouble());
15224 const double unused (deFrExp(d, &e));
15225 const double result (static_cast<double>(e));
15229 if (x.isNaN() || x.isInf())
15232 out[0] = fp16type(result).bits();
15233 min[0] = getMin(result, getULPs(in));
15234 max[0] = getMax(result, getULPs(in));
15240 struct fp16OpFAdd : public fp16PerComponent
15242 template<class fp16type>
15243 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15245 const fp16type x (*in[0]);
15246 const fp16type y (*in[1]);
15247 const double xd (x.asDouble());
15248 const double yd (y.asDouble());
15249 const double result (xd + yd);
15251 out[0] = fp16type(result).bits();
15252 min[0] = getMin(result, getULPs(in));
15253 max[0] = getMax(result, getULPs(in));
15259 struct fp16OpFSub : public fp16PerComponent
15261 template<class fp16type>
15262 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15264 const fp16type x (*in[0]);
15265 const fp16type y (*in[1]);
15266 const double xd (x.asDouble());
15267 const double yd (y.asDouble());
15268 const double result (xd - yd);
15270 out[0] = fp16type(result).bits();
15271 min[0] = getMin(result, getULPs(in));
15272 max[0] = getMax(result, getULPs(in));
15278 struct fp16OpFMul : public fp16PerComponent
15280 template<class fp16type>
15281 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15283 const fp16type x (*in[0]);
15284 const fp16type y (*in[1]);
15285 const double xd (x.asDouble());
15286 const double yd (y.asDouble());
15287 const double result (xd * yd);
15289 out[0] = fp16type(result).bits();
15290 min[0] = getMin(result, getULPs(in));
15291 max[0] = getMax(result, getULPs(in));
15297 struct fp16OpFDiv : public fp16PerComponent
15299 fp16OpFDiv() : fp16PerComponent()
15301 flavorNames.push_back("DirectDiv");
15302 flavorNames.push_back("InverseDiv");
15305 template<class fp16type>
15306 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15308 const fp16type x (*in[0]);
15309 const fp16type y (*in[1]);
15310 const double xd (x.asDouble());
15311 const double yd (y.asDouble());
15312 const double unspecUlp (16.0);
15313 const double ulpCnt (de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15314 double result (0.0);
15319 if (getFlavor() == 0)
15321 result = (xd / yd);
15323 else if (getFlavor() == 1)
15325 const double invyd (1.0 / yd);
15326 const fp16type invy (invyd);
15328 result = (xd * invy.asDouble());
15332 TCU_THROW(InternalError, "Unknown flavor");
15335 out[0] = fp16type(result).bits();
15336 min[0] = getMin(result, ulpCnt);
15337 max[0] = getMax(result, ulpCnt);
15343 struct fp16Atan2 : public fp16PerComponent
15345 fp16Atan2() : fp16PerComponent()
15347 flavorNames.push_back("DoubleCalc");
15348 flavorNames.push_back("DoubleCalc_PI");
15351 virtual double getULPs(vector<const deFloat16*>& in)
15355 return 2 * 5.0; // This is not a precision test. Value is not from spec
15358 template<class fp16type>
15359 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15361 const fp16type x (*in[0]);
15362 const fp16type y (*in[1]);
15363 const double xd (x.asDouble());
15364 const double yd (y.asDouble());
15365 double result (0.0);
15367 if ((x.isZero() && y.isZero())||(x.isInf() && y.isInf()))
15370 if (getFlavor() == 0)
15372 result = deAtan2(xd, yd);
15374 else if (getFlavor() == 1)
15376 const double ulps (2.0 * 5.0); // This is not a precision test. Value is not from spec
15377 const double eps (floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15379 result = deAtan2(xd, yd);
15381 if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15386 TCU_THROW(InternalError, "Unknown flavor");
15389 out[0] = fp16type(result).bits();
15390 min[0] = getMin(result, getULPs(in));
15391 max[0] = getMax(result, getULPs(in));
15397 struct fp16Pow : public fp16PerComponent
15399 fp16Pow() : fp16PerComponent()
15401 flavorNames.push_back("Pow");
15402 flavorNames.push_back("PowLog2");
15403 flavorNames.push_back("PowLog2FP16");
15406 template<class fp16type>
15407 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15409 const fp16type x (*in[0]);
15410 const fp16type y (*in[1]);
15411 const double xd (x.asDouble());
15412 const double yd (y.asDouble());
15413 const double logxeps (de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15414 const double ulps1 (1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15415 const double ulps2 (1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15416 const double ulps (deMax(deAbs(ulps1), deAbs(ulps2)));
15417 double result (0.0);
15422 if (x.isZero() && yd <= 0.0)
15425 if (getFlavor() == 0)
15427 result = dePow(xd, yd);
15429 else if (getFlavor() == 1)
15431 const double l2d (deLog2(xd));
15432 const double e2d (deExp2(yd * l2d));
15436 else if (getFlavor() == 2)
15438 const double l2d (deLog2(xd));
15439 const fp16type l2 (l2d);
15440 const double e2d (deExp2(yd * l2.asDouble()));
15441 const fp16type e2 (e2d);
15443 result = e2.asDouble();
15447 TCU_THROW(InternalError, "Unknown flavor");
15450 out[0] = fp16type(result).bits();
15451 min[0] = getMin(result, ulps);
15452 max[0] = getMax(result, ulps);
15458 struct fp16FMin : public fp16PerComponent
15460 template<class fp16type>
15461 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15463 const fp16type x (*in[0]);
15464 const fp16type y (*in[1]);
15465 const double xd (x.asDouble());
15466 const double yd (y.asDouble());
15467 const double result (deMin(xd, yd));
15469 if (x.isNaN() || y.isNaN())
15472 out[0] = fp16type(result).bits();
15473 min[0] = getMin(result, getULPs(in));
15474 max[0] = getMax(result, getULPs(in));
15480 struct fp16FMax : public fp16PerComponent
15482 template<class fp16type>
15483 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15485 const fp16type x (*in[0]);
15486 const fp16type y (*in[1]);
15487 const double xd (x.asDouble());
15488 const double yd (y.asDouble());
15489 const double result (deMax(xd, yd));
15491 if (x.isNaN() || y.isNaN())
15494 out[0] = fp16type(result).bits();
15495 min[0] = getMin(result, getULPs(in));
15496 max[0] = getMax(result, getULPs(in));
15502 struct fp16Step : public fp16PerComponent
15504 template<class fp16type>
15505 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15507 const fp16type edge (*in[0]);
15508 const fp16type x (*in[1]);
15509 const double edged (edge.asDouble());
15510 const double xd (x.asDouble());
15511 const double result (deStep(edged, xd));
15513 out[0] = fp16type(result).bits();
15514 min[0] = getMin(result, getULPs(in));
15515 max[0] = getMax(result, getULPs(in));
15521 struct fp16Ldexp : public fp16PerComponent
15523 template<class fp16type>
15524 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15526 const fp16type x (*in[0]);
15527 const fp16type y (*in[1]);
15528 const double xd (x.asDouble());
15529 const int yd (static_cast<int>(deTrunc(y.asDouble())));
15530 const double result (deLdExp(xd, yd));
15532 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15535 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15536 if (fp16type(result).isInf())
15539 out[0] = fp16type(result).bits();
15540 min[0] = getMin(result, getULPs(in));
15541 max[0] = getMax(result, getULPs(in));
15547 struct fp16FClamp : public fp16PerComponent
15549 template<class fp16type>
15550 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15552 const fp16type x (*in[0]);
15553 const fp16type minVal (*in[1]);
15554 const fp16type maxVal (*in[2]);
15555 const double xd (x.asDouble());
15556 const double minVald (minVal.asDouble());
15557 const double maxVald (maxVal.asDouble());
15558 const double result (deClamp(xd, minVald, maxVald));
15560 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15563 out[0] = fp16type(result).bits();
15564 min[0] = getMin(result, getULPs(in));
15565 max[0] = getMax(result, getULPs(in));
15571 struct fp16FMix : public fp16PerComponent
15573 fp16FMix() : fp16PerComponent()
15575 flavorNames.push_back("DoubleCalc");
15576 flavorNames.push_back("EmulatingFP16");
15577 flavorNames.push_back("EmulatingFP16YminusX");
15580 template<class fp16type>
15581 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15583 const fp16type x (*in[0]);
15584 const fp16type y (*in[1]);
15585 const fp16type a (*in[2]);
15586 const double ulps (8.0); // This is not a precision test. Value is not from spec
15587 double result (0.0);
15589 if (getFlavor() == 0)
15591 const double xd (x.asDouble());
15592 const double yd (y.asDouble());
15593 const double ad (a.asDouble());
15594 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15595 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15596 const double eps (xeps + yeps);
15598 result = deMix(xd, yd, ad);
15599 min[0] = result - eps;
15600 max[0] = result + eps;
15602 else if (getFlavor() == 1)
15604 const double xd (x.asDouble());
15605 const double yd (y.asDouble());
15606 const double ad (a.asDouble());
15607 const fp16type am (1.0 - ad);
15608 const double amd (am.asDouble());
15609 const fp16type xam (xd * amd);
15610 const double xamd (xam.asDouble());
15611 const fp16type ya (yd * ad);
15612 const double yad (ya.asDouble());
15613 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15614 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15615 const double eps (xeps + yeps);
15617 result = xamd + yad;
15618 min[0] = result - eps;
15619 max[0] = result + eps;
15621 else if (getFlavor() == 2)
15623 const double xd (x.asDouble());
15624 const double yd (y.asDouble());
15625 const double ad (a.asDouble());
15626 const fp16type ymx (yd - xd);
15627 const double ymxd (ymx.asDouble());
15628 const fp16type ymxa (ymxd * ad);
15629 const double ymxad (ymxa.asDouble());
15630 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15631 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15632 const double eps (xeps + yeps);
15634 result = xd + ymxad;
15635 min[0] = result - eps;
15636 max[0] = result + eps;
15640 TCU_THROW(InternalError, "Unknown flavor");
15643 out[0] = fp16type(result).bits();
15649 struct fp16SmoothStep : public fp16PerComponent
15651 fp16SmoothStep() : fp16PerComponent()
15653 flavorNames.push_back("FloatCalc");
15654 flavorNames.push_back("EmulatingFP16");
15655 flavorNames.push_back("EmulatingFP16WClamp");
15658 virtual double getULPs(vector<const deFloat16*>& in)
15662 return 4.0; // This is not a precision test. Value is not from spec
15665 template<class fp16type>
15666 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15668 const fp16type edge0 (*in[0]);
15669 const fp16type edge1 (*in[1]);
15670 const fp16type x (*in[2]);
15671 double result (0.0);
15673 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15676 if (edge0.isInf() || edge1.isInf() || x.isInf())
15679 if (getFlavor() == 0)
15681 const float edge0d (edge0.asFloat());
15682 const float edge1d (edge1.asFloat());
15683 const float xd (x.asFloat());
15684 const float sstep (deFloatSmoothStep(edge0d, edge1d, xd));
15688 else if (getFlavor() == 1)
15690 const double edge0d (edge0.asDouble());
15691 const double edge1d (edge1.asDouble());
15692 const double xd (x.asDouble());
15696 else if (xd >= edge1d)
15700 const fp16type a (xd - edge0d);
15701 const fp16type b (edge1d - edge0d);
15702 const fp16type t (a.asDouble() / b.asDouble());
15703 const fp16type t2 (2.0 * t.asDouble());
15704 const fp16type t3 (3.0 - t2.asDouble());
15705 const fp16type t4 (t.asDouble() * t3.asDouble());
15706 const fp16type t5 (t.asDouble() * t4.asDouble());
15708 result = t5.asDouble();
15711 else if (getFlavor() == 2)
15713 const double edge0d (edge0.asDouble());
15714 const double edge1d (edge1.asDouble());
15715 const double xd (x.asDouble());
15716 const fp16type a (xd - edge0d);
15717 const fp16type b (edge1d - edge0d);
15718 const fp16type bi (1.0 / b.asDouble());
15719 const fp16type t0 (a.asDouble() * bi.asDouble());
15720 const double tc (deClamp(t0.asDouble(), 0.0, 1.0));
15721 const fp16type t (tc);
15722 const fp16type t2 (2.0 * t.asDouble());
15723 const fp16type t3 (3.0 - t2.asDouble());
15724 const fp16type t4 (t.asDouble() * t3.asDouble());
15725 const fp16type t5 (t.asDouble() * t4.asDouble());
15727 result = t5.asDouble();
15731 TCU_THROW(InternalError, "Unknown flavor");
15734 out[0] = fp16type(result).bits();
15735 min[0] = getMin(result, getULPs(in));
15736 max[0] = getMax(result, getULPs(in));
15742 struct fp16Fma : public fp16PerComponent
15746 flavorNames.push_back("DoubleCalc");
15747 flavorNames.push_back("EmulatingFP16");
15750 virtual double getULPs(vector<const deFloat16*>& in)
15757 template<class fp16type>
15758 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15760 DE_ASSERT(in.size() == 3);
15761 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15762 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15763 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15764 DE_ASSERT(getOutCompCount() > 0);
15766 const fp16type a (*in[0]);
15767 const fp16type b (*in[1]);
15768 const fp16type c (*in[2]);
15769 double result (0.0);
15771 if (getFlavor() == 0)
15773 const double ad (a.asDouble());
15774 const double bd (b.asDouble());
15775 const double cd (c.asDouble());
15777 result = deMadd(ad, bd, cd);
15779 else if (getFlavor() == 1)
15781 const double ad (a.asDouble());
15782 const double bd (b.asDouble());
15783 const double cd (c.asDouble());
15784 const fp16type ab (ad * bd);
15785 const fp16type r (ab.asDouble() + cd);
15787 result = r.asDouble();
15791 TCU_THROW(InternalError, "Unknown flavor");
15794 out[0] = fp16type(result).bits();
15795 min[0] = getMin(result, getULPs(in));
15796 max[0] = getMax(result, getULPs(in));
15803 struct fp16AllComponents : public fp16PerComponent
15805 bool callOncePerComponent () { return false; }
15808 struct fp16Length : public fp16AllComponents
15810 fp16Length() : fp16AllComponents()
15812 flavorNames.push_back("EmulatingFP16");
15813 flavorNames.push_back("DoubleCalc");
15816 virtual double getULPs(vector<const deFloat16*>& in)
15823 template<class fp16type>
15824 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15826 DE_ASSERT(getOutCompCount() == 1);
15827 DE_ASSERT(in.size() == 1);
15829 double result (0.0);
15831 if (getFlavor() == 0)
15835 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15837 const fp16type x (in[0][componentNdx]);
15838 const fp16type q (x.asDouble() * x.asDouble());
15840 r = fp16type(r.asDouble() + q.asDouble());
15843 result = deSqrt(r.asDouble());
15845 out[0] = fp16type(result).bits();
15847 else if (getFlavor() == 1)
15851 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15853 const fp16type x (in[0][componentNdx]);
15854 const double q (x.asDouble() * x.asDouble());
15859 result = deSqrt(r);
15861 out[0] = fp16type(result).bits();
15865 TCU_THROW(InternalError, "Unknown flavor");
15868 min[0] = getMin(result, getULPs(in));
15869 max[0] = getMax(result, getULPs(in));
15875 struct fp16Distance : public fp16AllComponents
15877 fp16Distance() : fp16AllComponents()
15879 flavorNames.push_back("EmulatingFP16");
15880 flavorNames.push_back("DoubleCalc");
15883 virtual double getULPs(vector<const deFloat16*>& in)
15890 template<class fp16type>
15891 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15893 DE_ASSERT(getOutCompCount() == 1);
15894 DE_ASSERT(in.size() == 2);
15895 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15897 double result (0.0);
15899 if (getFlavor() == 0)
15903 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15905 const fp16type x (in[0][componentNdx]);
15906 const fp16type y (in[1][componentNdx]);
15907 const fp16type d (x.asDouble() - y.asDouble());
15908 const fp16type q (d.asDouble() * d.asDouble());
15910 r = fp16type(r.asDouble() + q.asDouble());
15913 result = deSqrt(r.asDouble());
15915 else if (getFlavor() == 1)
15919 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15921 const fp16type x (in[0][componentNdx]);
15922 const fp16type y (in[1][componentNdx]);
15923 const double d (x.asDouble() - y.asDouble());
15924 const double q (d * d);
15929 result = deSqrt(r);
15933 TCU_THROW(InternalError, "Unknown flavor");
15936 out[0] = fp16type(result).bits();
15937 min[0] = getMin(result, getULPs(in));
15938 max[0] = getMax(result, getULPs(in));
15944 struct fp16Cross : public fp16AllComponents
15946 fp16Cross() : fp16AllComponents()
15948 flavorNames.push_back("EmulatingFP16");
15949 flavorNames.push_back("DoubleCalc");
15952 virtual double getULPs(vector<const deFloat16*>& in)
15959 template<class fp16type>
15960 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15962 DE_ASSERT(getOutCompCount() == 3);
15963 DE_ASSERT(in.size() == 2);
15964 DE_ASSERT(getArgCompCount(0) == 3);
15965 DE_ASSERT(getArgCompCount(1) == 3);
15967 if (getFlavor() == 0)
15969 const fp16type x0 (in[0][0]);
15970 const fp16type x1 (in[0][1]);
15971 const fp16type x2 (in[0][2]);
15972 const fp16type y0 (in[1][0]);
15973 const fp16type y1 (in[1][1]);
15974 const fp16type y2 (in[1][2]);
15975 const fp16type x1y2 (x1.asDouble() * y2.asDouble());
15976 const fp16type y1x2 (y1.asDouble() * x2.asDouble());
15977 const fp16type x2y0 (x2.asDouble() * y0.asDouble());
15978 const fp16type y2x0 (y2.asDouble() * x0.asDouble());
15979 const fp16type x0y1 (x0.asDouble() * y1.asDouble());
15980 const fp16type y0x1 (y0.asDouble() * x1.asDouble());
15982 out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
15983 out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
15984 out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
15986 else if (getFlavor() == 1)
15988 const fp16type x0 (in[0][0]);
15989 const fp16type x1 (in[0][1]);
15990 const fp16type x2 (in[0][2]);
15991 const fp16type y0 (in[1][0]);
15992 const fp16type y1 (in[1][1]);
15993 const fp16type y2 (in[1][2]);
15994 const double x1y2 (x1.asDouble() * y2.asDouble());
15995 const double y1x2 (y1.asDouble() * x2.asDouble());
15996 const double x2y0 (x2.asDouble() * y0.asDouble());
15997 const double y2x0 (y2.asDouble() * x0.asDouble());
15998 const double x0y1 (x0.asDouble() * y1.asDouble());
15999 const double y0x1 (y0.asDouble() * x1.asDouble());
16001 out[0] = fp16type(x1y2 - y1x2).bits();
16002 out[1] = fp16type(x2y0 - y2x0).bits();
16003 out[2] = fp16type(x0y1 - y0x1).bits();
16007 TCU_THROW(InternalError, "Unknown flavor");
16010 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16011 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16012 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16013 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16019 struct fp16Normalize : public fp16AllComponents
16021 fp16Normalize() : fp16AllComponents()
16023 flavorNames.push_back("EmulatingFP16");
16024 flavorNames.push_back("DoubleCalc");
16026 permutationsFlavorStart = 0;
16027 permutationsFlavorEnd = flavorNames.size();
16029 // flavorNames will be extended later
16032 virtual void setArgCompCount (size_t argNo, size_t compCount)
16034 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16036 if (argNo == 0 && argCompCount[argNo] == 0)
16038 const size_t maxPermutationsCount = 24u; // Equal to 4!
16039 std::vector<int> indices;
16041 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16042 indices.push_back(static_cast<int>(componentNdx));
16044 m_permutations.reserve(maxPermutationsCount);
16046 permutationsFlavorStart = flavorNames.size();
16050 tcu::UVec4 permutation;
16051 std::string name = "Permutted_";
16053 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16055 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16056 name += de::toString(indices[componentNdx]);
16059 m_permutations.push_back(permutation);
16060 flavorNames.push_back(name);
16062 } while(std::next_permutation(indices.begin(), indices.end()));
16064 permutationsFlavorEnd = flavorNames.size();
16067 fp16AllComponents::setArgCompCount(argNo, compCount);
16069 virtual double getULPs(vector<const deFloat16*>& in)
16076 template<class fp16type>
16077 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16079 DE_ASSERT(in.size() == 1);
16080 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16082 if (getFlavor() == 0)
16086 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16088 const fp16type x (in[0][componentNdx]);
16089 const fp16type q (x.asDouble() * x.asDouble());
16091 r = fp16type(r.asDouble() + q.asDouble());
16094 r = fp16type(deSqrt(r.asDouble()));
16099 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16101 const fp16type x (in[0][componentNdx]);
16103 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16106 else if (getFlavor() == 1)
16110 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16112 const fp16type x (in[0][componentNdx]);
16113 const double q (x.asDouble() * x.asDouble());
16123 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16125 const fp16type x (in[0][componentNdx]);
16127 out[componentNdx] = fp16type(x.asDouble() / r).bits();
16130 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16132 const int compCount (static_cast<int>(getArgCompCount(0)));
16133 const size_t permutationNdx (getFlavor() - permutationsFlavorStart);
16134 const tcu::UVec4& permutation (m_permutations[permutationNdx]);
16137 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16139 const size_t componentNdx (permutation[permComponentNdx]);
16140 const fp16type x (in[0][componentNdx]);
16141 const fp16type q (x.asDouble() * x.asDouble());
16143 r = fp16type(r.asDouble() + q.asDouble());
16146 r = fp16type(deSqrt(r.asDouble()));
16151 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16153 const size_t componentNdx (permutation[permComponentNdx]);
16154 const fp16type x (in[0][componentNdx]);
16156 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16161 TCU_THROW(InternalError, "Unknown flavor");
16164 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16165 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16166 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16167 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16173 std::vector<tcu::UVec4> m_permutations;
16174 size_t permutationsFlavorStart;
16175 size_t permutationsFlavorEnd;
16178 struct fp16FaceForward : public fp16AllComponents
16180 virtual double getULPs(vector<const deFloat16*>& in)
16187 template<class fp16type>
16188 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16190 DE_ASSERT(in.size() == 3);
16191 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16192 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16193 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16197 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16199 const fp16type x (in[1][componentNdx]);
16200 const fp16type y (in[2][componentNdx]);
16201 const double xd (x.asDouble());
16202 const double yd (y.asDouble());
16203 const fp16type q (xd * yd);
16205 dp = fp16type(dp.asDouble() + q.asDouble());
16208 if (dp.isNaN() || dp.isZero())
16211 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16213 const fp16type n (in[0][componentNdx]);
16215 out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16218 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16219 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16220 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16221 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16227 struct fp16Reflect : public fp16AllComponents
16229 fp16Reflect() : fp16AllComponents()
16231 flavorNames.push_back("EmulatingFP16");
16232 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16233 flavorNames.push_back("FloatCalc");
16234 flavorNames.push_back("FloatCalc+KeepZeroSign");
16235 flavorNames.push_back("EmulatingFP16+2Nfirst");
16236 flavorNames.push_back("EmulatingFP16+2Ifirst");
16239 virtual double getULPs(vector<const deFloat16*>& in)
16243 return 256.0; // This is not a precision test. Value is not from spec
16246 template<class fp16type>
16247 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16249 DE_ASSERT(in.size() == 2);
16250 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16251 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16253 if (getFlavor() < 4)
16255 const bool keepZeroSign ((flavor & 1) != 0 ? true : false);
16256 const bool floatCalc ((flavor & 2) != 0 ? true : false);
16262 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16264 const fp16type i (in[0][componentNdx]);
16265 const fp16type n (in[1][componentNdx]);
16266 const float id (i.asFloat());
16267 const float nd (n.asFloat());
16268 const float qd (id * nd);
16271 dp = (componentNdx == 0) ? qd : dp + qd;
16276 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16278 const fp16type i (in[0][componentNdx]);
16279 const fp16type n (in[1][componentNdx]);
16280 const float dpnd (dp * n.asFloat());
16281 const float dpn2d (2.0f * dpnd);
16282 const float idpn2d (i.asFloat() - dpn2d);
16283 const fp16type result (idpn2d);
16285 out[componentNdx] = result.bits();
16292 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16294 const fp16type i (in[0][componentNdx]);
16295 const fp16type n (in[1][componentNdx]);
16296 const double id (i.asDouble());
16297 const double nd (n.asDouble());
16298 const fp16type q (id * nd);
16301 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16303 dp = fp16type(dp.asDouble() + q.asDouble());
16309 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16311 const fp16type i (in[0][componentNdx]);
16312 const fp16type n (in[1][componentNdx]);
16313 const fp16type dpn (dp.asDouble() * n.asDouble());
16314 const fp16type dpn2 (2 * dpn.asDouble());
16315 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16317 out[componentNdx] = idpn2.bits();
16321 else if (getFlavor() == 4)
16325 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16327 const fp16type i (in[0][componentNdx]);
16328 const fp16type n (in[1][componentNdx]);
16329 const double id (i.asDouble());
16330 const double nd (n.asDouble());
16331 const fp16type q (id * nd);
16333 dp = fp16type(dp.asDouble() + q.asDouble());
16339 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16341 const fp16type i (in[0][componentNdx]);
16342 const fp16type n (in[1][componentNdx]);
16343 const fp16type n2 (2 * n.asDouble());
16344 const fp16type dpn2 (dp.asDouble() * n2.asDouble());
16345 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16347 out[componentNdx] = idpn2.bits();
16350 else if (getFlavor() == 5)
16354 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16356 const fp16type i (in[0][componentNdx]);
16357 const fp16type n (in[1][componentNdx]);
16358 const fp16type i2 (2.0 * i.asDouble());
16359 const double i2d (i2.asDouble());
16360 const double nd (n.asDouble());
16361 const fp16type q (i2d * nd);
16363 dp2 = fp16type(dp2.asDouble() + q.asDouble());
16369 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16371 const fp16type i (in[0][componentNdx]);
16372 const fp16type n (in[1][componentNdx]);
16373 const fp16type dpn2 (dp2.asDouble() * n.asDouble());
16374 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16376 out[componentNdx] = idpn2.bits();
16381 TCU_THROW(InternalError, "Unknown flavor");
16384 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16385 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16386 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16387 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16393 struct fp16Refract : public fp16AllComponents
16395 fp16Refract() : fp16AllComponents()
16397 flavorNames.push_back("EmulatingFP16");
16398 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16399 flavorNames.push_back("FloatCalc");
16400 flavorNames.push_back("FloatCalc+KeepZeroSign");
16403 virtual double getULPs(vector<const deFloat16*>& in)
16407 return 8192.0; // This is not a precision test. Value is not from spec
16410 template<class fp16type>
16411 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16413 DE_ASSERT(in.size() == 3);
16414 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16415 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16416 DE_ASSERT(getArgCompCount(2) == 1);
16418 const bool keepZeroSign ((flavor & 1) != 0 ? true : false);
16419 const bool doubleCalc ((flavor & 2) != 0 ? true : false);
16420 const fp16type eta (*in[2]);
16426 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16428 const fp16type i (in[0][componentNdx]);
16429 const fp16type n (in[1][componentNdx]);
16430 const double id (i.asDouble());
16431 const double nd (n.asDouble());
16432 const double qd (id * nd);
16435 dp = (componentNdx == 0) ? qd : dp + qd;
16440 const double eta2 (eta.asDouble() * eta.asDouble());
16441 const double dp2 (dp * dp);
16442 const double dp1 (1.0 - dp2);
16443 const double dpe (eta2 * dp1);
16444 const double k (1.0 - dpe);
16448 const fp16type zero (0.0);
16450 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16451 out[componentNdx] = zero.bits();
16455 const double sk (deSqrt(k));
16457 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16459 const fp16type i (in[0][componentNdx]);
16460 const fp16type n (in[1][componentNdx]);
16461 const double etai (i.asDouble() * eta.asDouble());
16462 const double etadp (eta.asDouble() * dp);
16463 const double etadpk (etadp + sk);
16464 const double etadpkn (etadpk * n.asDouble());
16465 const double full (etai - etadpkn);
16466 const fp16type result (full);
16468 if (result.isInf())
16471 out[componentNdx] = result.bits();
16479 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16481 const fp16type i (in[0][componentNdx]);
16482 const fp16type n (in[1][componentNdx]);
16483 const double id (i.asDouble());
16484 const double nd (n.asDouble());
16485 const fp16type q (id * nd);
16488 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16490 dp = fp16type(dp.asDouble() + q.asDouble());
16496 const fp16type eta2(eta.asDouble() * eta.asDouble());
16497 const fp16type dp2 (dp.asDouble() * dp.asDouble());
16498 const fp16type dp1 (1.0 - dp2.asDouble());
16499 const fp16type dpe (eta2.asDouble() * dp1.asDouble());
16500 const fp16type k (1.0 - dpe.asDouble());
16502 if (k.asDouble() < 0.0)
16504 const fp16type zero (0.0);
16506 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16507 out[componentNdx] = zero.bits();
16511 const fp16type sk (deSqrt(k.asDouble()));
16513 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16515 const fp16type i (in[0][componentNdx]);
16516 const fp16type n (in[1][componentNdx]);
16517 const fp16type etai (i.asDouble() * eta.asDouble());
16518 const fp16type etadp (eta.asDouble() * dp.asDouble());
16519 const fp16type etadpk (etadp.asDouble() + sk.asDouble());
16520 const fp16type etadpkn (etadpk.asDouble() * n.asDouble());
16521 const fp16type full (etai.asDouble() - etadpkn.asDouble());
16523 if (full.isNaN() || full.isInf())
16526 out[componentNdx] = full.bits();
16531 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16532 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16533 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16534 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16540 struct fp16Dot : public fp16AllComponents
16542 fp16Dot() : fp16AllComponents()
16544 flavorNames.push_back("EmulatingFP16");
16545 flavorNames.push_back("FloatCalc");
16546 flavorNames.push_back("DoubleCalc");
16548 permutationsFlavorStart = 0;
16549 permutationsFlavorEnd = flavorNames.size();
16551 // flavorNames will be extended later
16554 virtual void setArgCompCount (size_t argNo, size_t compCount)
16556 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16558 if (argNo == 0 && argCompCount[argNo] == 0)
16560 const size_t maxPermutationsCount = 24u; // Equal to 4!
16561 std::vector<int> indices;
16563 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16564 indices.push_back(static_cast<int>(componentNdx));
16566 m_permutations.reserve(maxPermutationsCount);
16568 permutationsFlavorStart = flavorNames.size();
16572 tcu::UVec4 permutation;
16573 std::string name = "Permutted_";
16575 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16577 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16578 name += de::toString(indices[componentNdx]);
16581 m_permutations.push_back(permutation);
16582 flavorNames.push_back(name);
16584 } while(std::next_permutation(indices.begin(), indices.end()));
16586 permutationsFlavorEnd = flavorNames.size();
16589 fp16AllComponents::setArgCompCount(argNo, compCount);
16592 virtual double getULPs(vector<const deFloat16*>& in)
16596 return 16.0; // This is not a precision test. Value is not from spec
16599 template<class fp16type>
16600 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16602 DE_ASSERT(in.size() == 2);
16603 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16604 DE_ASSERT(getOutCompCount() == 1);
16606 double result (0.0);
16609 if (getFlavor() == 0)
16613 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16615 const fp16type x (in[0][componentNdx]);
16616 const fp16type y (in[1][componentNdx]);
16617 const fp16type q (x.asDouble() * y.asDouble());
16619 dp = fp16type(dp.asDouble() + q.asDouble());
16620 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16623 result = dp.asDouble();
16625 else if (getFlavor() == 1)
16629 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16631 const fp16type x (in[0][componentNdx]);
16632 const fp16type y (in[1][componentNdx]);
16633 const float q (x.asFloat() * y.asFloat());
16636 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16641 else if (getFlavor() == 2)
16645 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16647 const fp16type x (in[0][componentNdx]);
16648 const fp16type y (in[1][componentNdx]);
16649 const double q (x.asDouble() * y.asDouble());
16652 eps += floatFormat16.ulp(q, 2.0);
16657 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16659 const int compCount (static_cast<int>(getArgCompCount(1)));
16660 const size_t permutationNdx (getFlavor() - permutationsFlavorStart);
16661 const tcu::UVec4& permutation (m_permutations[permutationNdx]);
16664 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16666 const size_t componentNdx (permutation[permComponentNdx]);
16667 const fp16type x (in[0][componentNdx]);
16668 const fp16type y (in[1][componentNdx]);
16669 const fp16type q (x.asDouble() * y.asDouble());
16671 dp = fp16type(dp.asDouble() + q.asDouble());
16672 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16675 result = dp.asDouble();
16679 TCU_THROW(InternalError, "Unknown flavor");
16682 out[0] = fp16type(result).bits();
16683 min[0] = result - eps;
16684 max[0] = result + eps;
16690 std::vector<tcu::UVec4> m_permutations;
16691 size_t permutationsFlavorStart;
16692 size_t permutationsFlavorEnd;
16695 struct fp16VectorTimesScalar : public fp16AllComponents
16697 virtual double getULPs(vector<const deFloat16*>& in)
16704 template<class fp16type>
16705 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16707 DE_ASSERT(in.size() == 2);
16708 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16709 DE_ASSERT(getArgCompCount(1) == 1);
16711 fp16type s (*in[1]);
16713 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16715 const fp16type x (in[0][componentNdx]);
16716 const double result (s.asDouble() * x.asDouble());
16717 const fp16type m (result);
16719 out[componentNdx] = m.bits();
16720 min[componentNdx] = getMin(result, getULPs(in));
16721 max[componentNdx] = getMax(result, getULPs(in));
16728 struct fp16MatrixBase : public fp16AllComponents
16730 deUint32 getComponentValidity ()
16732 return static_cast<deUint32>(-1);
16735 inline size_t getNdx (const size_t rowCount, const size_t col, const size_t row)
16737 const size_t minComponentCount = 0;
16738 const size_t maxComponentCount = 3;
16739 const size_t alignedRowsCount = (rowCount == 3) ? 4 : rowCount;
16741 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16742 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16743 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16744 DE_UNREF(minComponentCount);
16745 DE_UNREF(maxComponentCount);
16747 return col * alignedRowsCount + row;
16750 deUint32 getComponentMatrixValidityMask (size_t cols, size_t rows)
16752 deUint32 result = 0u;
16754 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16755 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16757 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16759 DE_ASSERT(bitNdx < sizeof(result) * 8);
16761 result |= (1<<bitNdx);
16768 template<size_t cols, size_t rows>
16769 struct fp16Transpose : public fp16MatrixBase
16771 virtual double getULPs(vector<const deFloat16*>& in)
16778 deUint32 getComponentValidity ()
16780 return getComponentMatrixValidityMask(rows, cols);
16783 template<class fp16type>
16784 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16786 DE_ASSERT(in.size() == 1);
16788 const size_t alignedCols = (cols == 3) ? 4 : cols;
16789 const size_t alignedRows = (rows == 3) ? 4 : rows;
16790 vector<deFloat16> output (alignedCols * alignedRows, 0);
16792 DE_ASSERT(output.size() == alignedCols * alignedRows);
16794 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16795 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16796 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16798 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16799 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16800 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16806 template<size_t cols, size_t rows>
16807 struct fp16MatrixTimesScalar : public fp16MatrixBase
16809 virtual double getULPs(vector<const deFloat16*>& in)
16816 deUint32 getComponentValidity ()
16818 return getComponentMatrixValidityMask(cols, rows);
16821 template<class fp16type>
16822 bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16824 DE_ASSERT(in.size() == 2);
16825 DE_ASSERT(getArgCompCount(1) == 1);
16827 const fp16type y (in[1][0]);
16828 const float scalar (y.asFloat());
16829 const size_t alignedCols = (cols == 3) ? 4 : cols;
16830 const size_t alignedRows = (rows == 3) ? 4 : rows;
16832 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16833 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16834 DE_UNREF(alignedCols);
16836 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16837 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16839 const size_t ndx (colNdx * alignedRows + rowNdx);
16840 const fp16type x (in[0][ndx]);
16841 const double result (scalar * x.asFloat());
16843 out[ndx] = fp16type(result).bits();
16844 min[ndx] = getMin(result, getULPs(in));
16845 max[ndx] = getMax(result, getULPs(in));
16852 template<size_t cols, size_t rows>
16853 struct fp16VectorTimesMatrix : public fp16MatrixBase
16855 fp16VectorTimesMatrix() : fp16MatrixBase()
16857 flavorNames.push_back("EmulatingFP16");
16858 flavorNames.push_back("FloatCalc");
16861 virtual double getULPs (vector<const deFloat16*>& in)
16865 return (8.0 * cols);
16868 deUint32 getComponentValidity ()
16870 return getComponentMatrixValidityMask(cols, 1);
16873 template<class fp16type>
16874 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16876 DE_ASSERT(in.size() == 2);
16878 const size_t alignedCols = (cols == 3) ? 4 : cols;
16879 const size_t alignedRows = (rows == 3) ? 4 : rows;
16881 DE_ASSERT(getOutCompCount() == cols);
16882 DE_ASSERT(getArgCompCount(0) == rows);
16883 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16884 DE_UNREF(alignedCols);
16886 if (getFlavor() == 0)
16888 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16890 fp16type s (fp16type::zero(1));
16892 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16894 const fp16type v (in[0][rowNdx]);
16895 const float vf (v.asFloat());
16896 const size_t ndx (colNdx * alignedRows + rowNdx);
16897 const fp16type x (in[1][ndx]);
16898 const float xf (x.asFloat());
16899 const fp16type m (vf * xf);
16901 s = fp16type(s.asFloat() + m.asFloat());
16904 out[colNdx] = s.bits();
16905 min[colNdx] = getMin(s.asDouble(), getULPs(in));
16906 max[colNdx] = getMax(s.asDouble(), getULPs(in));
16909 else if (getFlavor() == 1)
16911 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16915 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16917 const fp16type v (in[0][rowNdx]);
16918 const float vf (v.asFloat());
16919 const size_t ndx (colNdx * alignedRows + rowNdx);
16920 const fp16type x (in[1][ndx]);
16921 const float xf (x.asFloat());
16922 const float m (vf * xf);
16927 out[colNdx] = fp16type(s).bits();
16928 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
16929 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
16934 TCU_THROW(InternalError, "Unknown flavor");
16941 template<size_t cols, size_t rows>
16942 struct fp16MatrixTimesVector : public fp16MatrixBase
16944 fp16MatrixTimesVector() : fp16MatrixBase()
16946 flavorNames.push_back("EmulatingFP16");
16947 flavorNames.push_back("FloatCalc");
16950 virtual double getULPs (vector<const deFloat16*>& in)
16954 return (8.0 * rows);
16957 deUint32 getComponentValidity ()
16959 return getComponentMatrixValidityMask(rows, 1);
16962 template<class fp16type>
16963 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16965 DE_ASSERT(in.size() == 2);
16967 const size_t alignedCols = (cols == 3) ? 4 : cols;
16968 const size_t alignedRows = (rows == 3) ? 4 : rows;
16970 DE_ASSERT(getOutCompCount() == rows);
16971 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16972 DE_ASSERT(getArgCompCount(1) == cols);
16973 DE_UNREF(alignedCols);
16975 if (getFlavor() == 0)
16977 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16979 fp16type s (fp16type::zero(1));
16981 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16983 const size_t ndx (colNdx * alignedRows + rowNdx);
16984 const fp16type x (in[0][ndx]);
16985 const float xf (x.asFloat());
16986 const fp16type v (in[1][colNdx]);
16987 const float vf (v.asFloat());
16988 const fp16type m (vf * xf);
16990 s = fp16type(s.asFloat() + m.asFloat());
16993 out[rowNdx] = s.bits();
16994 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
16995 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
16998 else if (getFlavor() == 1)
17000 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17004 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17006 const size_t ndx (colNdx * alignedRows + rowNdx);
17007 const fp16type x (in[0][ndx]);
17008 const float xf (x.asFloat());
17009 const fp16type v (in[1][colNdx]);
17010 const float vf (v.asFloat());
17011 const float m (vf * xf);
17016 out[rowNdx] = fp16type(s).bits();
17017 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17018 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17023 TCU_THROW(InternalError, "Unknown flavor");
17030 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17031 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17033 fp16MatrixTimesMatrix() : fp16MatrixBase()
17035 flavorNames.push_back("EmulatingFP16");
17036 flavorNames.push_back("FloatCalc");
17039 virtual double getULPs (vector<const deFloat16*>& in)
17046 deUint32 getComponentValidity ()
17048 return getComponentMatrixValidityMask(colsR, rowsL);
17051 template<class fp16type>
17052 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17054 DE_STATIC_ASSERT(colsL == rowsR);
17056 DE_ASSERT(in.size() == 2);
17058 const size_t alignedColsL = (colsL == 3) ? 4 : colsL;
17059 const size_t alignedRowsL = (rowsL == 3) ? 4 : rowsL;
17060 const size_t alignedColsR = (colsR == 3) ? 4 : colsR;
17061 const size_t alignedRowsR = (rowsR == 3) ? 4 : rowsR;
17063 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17064 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17065 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17066 DE_UNREF(alignedColsL);
17067 DE_UNREF(alignedColsR);
17069 if (getFlavor() == 0)
17071 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17073 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17075 const size_t ndx (colNdx * alignedRowsL + rowNdx);
17076 fp16type s (fp16type::zero(1));
17078 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17080 const size_t ndxl (commonNdx * alignedRowsL + rowNdx);
17081 const fp16type l (in[0][ndxl]);
17082 const float lf (l.asFloat());
17083 const size_t ndxr (colNdx * alignedRowsR + commonNdx);
17084 const fp16type r (in[1][ndxr]);
17085 const float rf (r.asFloat());
17086 const fp16type m (lf * rf);
17088 s = fp16type(s.asFloat() + m.asFloat());
17091 out[ndx] = s.bits();
17092 min[ndx] = getMin(s.asDouble(), getULPs(in));
17093 max[ndx] = getMax(s.asDouble(), getULPs(in));
17097 else if (getFlavor() == 1)
17099 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17101 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17103 const size_t ndx (colNdx * alignedRowsL + rowNdx);
17106 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17108 const size_t ndxl (commonNdx * alignedRowsL + rowNdx);
17109 const fp16type l (in[0][ndxl]);
17110 const float lf (l.asFloat());
17111 const size_t ndxr (colNdx * alignedRowsR + commonNdx);
17112 const fp16type r (in[1][ndxr]);
17113 const float rf (r.asFloat());
17114 const float m (lf * rf);
17119 out[ndx] = fp16type(s).bits();
17120 min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17121 max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17127 TCU_THROW(InternalError, "Unknown flavor");
17134 template<size_t cols, size_t rows>
17135 struct fp16OuterProduct : public fp16MatrixBase
17137 virtual double getULPs (vector<const deFloat16*>& in)
17144 deUint32 getComponentValidity ()
17146 return getComponentMatrixValidityMask(cols, rows);
17149 template<class fp16type>
17150 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17152 DE_ASSERT(in.size() == 2);
17154 const size_t alignedCols = (cols == 3) ? 4 : cols;
17155 const size_t alignedRows = (rows == 3) ? 4 : rows;
17157 DE_ASSERT(getArgCompCount(0) == rows);
17158 DE_ASSERT(getArgCompCount(1) == cols);
17159 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17160 DE_UNREF(alignedCols);
17162 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17164 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17166 const size_t ndx (colNdx * alignedRows + rowNdx);
17167 const fp16type x (in[0][rowNdx]);
17168 const float xf (x.asFloat());
17169 const fp16type y (in[1][colNdx]);
17170 const float yf (y.asFloat());
17171 const fp16type m (xf * yf);
17173 out[ndx] = m.bits();
17174 min[ndx] = getMin(m.asDouble(), getULPs(in));
17175 max[ndx] = getMax(m.asDouble(), getULPs(in));
17183 template<size_t size>
17184 struct fp16Determinant;
17187 struct fp16Determinant<2> : public fp16MatrixBase
17189 virtual double getULPs (vector<const deFloat16*>& in)
17193 return 128.0; // This is not a precision test. Value is not from spec
17196 deUint32 getComponentValidity ()
17201 template<class fp16type>
17202 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17204 const size_t cols = 2;
17205 const size_t rows = 2;
17206 const size_t alignedCols = (cols == 3) ? 4 : cols;
17207 const size_t alignedRows = (rows == 3) ? 4 : rows;
17209 DE_ASSERT(in.size() == 1);
17210 DE_ASSERT(getOutCompCount() == 1);
17211 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17212 DE_UNREF(alignedCols);
17213 DE_UNREF(alignedRows);
17217 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17218 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17219 const float c (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17220 const float d (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17221 const float ad (a * d);
17222 const fp16type adf16 (ad);
17223 const float bc (b * c);
17224 const fp16type bcf16 (bc);
17225 const float r (adf16.asFloat() - bcf16.asFloat());
17226 const fp16type rf16 (r);
17228 out[0] = rf16.bits();
17229 min[0] = getMin(r, getULPs(in));
17230 max[0] = getMax(r, getULPs(in));
17237 struct fp16Determinant<3> : public fp16MatrixBase
17239 virtual double getULPs (vector<const deFloat16*>& in)
17243 return 128.0; // This is not a precision test. Value is not from spec
17246 deUint32 getComponentValidity ()
17251 template<class fp16type>
17252 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17254 const size_t cols = 3;
17255 const size_t rows = 3;
17256 const size_t alignedCols = (cols == 3) ? 4 : cols;
17257 const size_t alignedRows = (rows == 3) ? 4 : rows;
17259 DE_ASSERT(in.size() == 1);
17260 DE_ASSERT(getOutCompCount() == 1);
17261 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17262 DE_UNREF(alignedCols);
17263 DE_UNREF(alignedRows);
17268 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17269 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17270 const float c (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17271 const float d (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17272 const float e (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17273 const float f (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17274 const float g (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17275 const float h (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17276 const float i (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17277 const fp16type aei (a * e * i);
17278 const fp16type bfg (b * f * g);
17279 const fp16type cdh (c * d * h);
17280 const fp16type ceg (c * e * g);
17281 const fp16type bdi (b * d * i);
17282 const fp16type afh (a * f * h);
17283 const float r (aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17284 const fp16type rf16 (r);
17286 out[0] = rf16.bits();
17287 min[0] = getMin(r, getULPs(in));
17288 max[0] = getMax(r, getULPs(in));
17295 struct fp16Determinant<4> : public fp16MatrixBase
17297 virtual double getULPs (vector<const deFloat16*>& in)
17301 return 128.0; // This is not a precision test. Value is not from spec
17304 deUint32 getComponentValidity ()
17309 template<class fp16type>
17310 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17312 const size_t rows = 4;
17313 const size_t cols = 4;
17314 const size_t alignedCols = (cols == 3) ? 4 : cols;
17315 const size_t alignedRows = (rows == 3) ? 4 : rows;
17317 DE_ASSERT(in.size() == 1);
17318 DE_ASSERT(getOutCompCount() == 1);
17319 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17320 DE_UNREF(alignedCols);
17321 DE_UNREF(alignedRows);
17327 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17328 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17329 const float c (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17330 const float d (fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17331 const float e (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17332 const float f (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17333 const float g (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17334 const float h (fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17335 const float i (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17336 const float j (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17337 const float k (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17338 const float l (fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17339 const float m (fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17340 const float n (fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17341 const float o (fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17342 const float p (fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17347 const fp16type fkp (f * k * p);
17348 const fp16type gln (g * l * n);
17349 const fp16type hjo (h * j * o);
17350 const fp16type hkn (h * k * n);
17351 const fp16type gjp (g * j * p);
17352 const fp16type flo (f * l * o);
17353 const fp16type detA (a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17358 const fp16type ekp (e * k * p);
17359 const fp16type glm (g * l * m);
17360 const fp16type hio (h * i * o);
17361 const fp16type hkm (h * k * m);
17362 const fp16type gip (g * i * p);
17363 const fp16type elo (e * l * o);
17364 const fp16type detB (b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17369 const fp16type ejp (e * j * p);
17370 const fp16type flm (f * l * m);
17371 const fp16type hin (h * i * n);
17372 const fp16type hjm (h * j * m);
17373 const fp16type fip (f * i * p);
17374 const fp16type eln (e * l * n);
17375 const fp16type detC (c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17380 const fp16type ejo (e * j * o);
17381 const fp16type fkm (f * k * m);
17382 const fp16type gin (g * i * n);
17383 const fp16type gjm (g * j * m);
17384 const fp16type fio (f * i * o);
17385 const fp16type ekn (e * k * n);
17386 const fp16type detD (d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17388 const float r (detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17389 const fp16type rf16 (r);
17391 out[0] = rf16.bits();
17392 min[0] = getMin(r, getULPs(in));
17393 max[0] = getMax(r, getULPs(in));
17399 template<size_t size>
17400 struct fp16Inverse;
17403 struct fp16Inverse<2> : public fp16MatrixBase
17405 virtual double getULPs (vector<const deFloat16*>& in)
17409 return 128.0; // This is not a precision test. Value is not from spec
17412 deUint32 getComponentValidity ()
17414 return getComponentMatrixValidityMask(2, 2);
17417 template<class fp16type>
17418 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17420 const size_t cols = 2;
17421 const size_t rows = 2;
17422 const size_t alignedCols = (cols == 3) ? 4 : cols;
17423 const size_t alignedRows = (rows == 3) ? 4 : rows;
17425 DE_ASSERT(in.size() == 1);
17426 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17427 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17428 DE_UNREF(alignedCols);
17432 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17433 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17434 const float c (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17435 const float d (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17436 const float ad (a * d);
17437 const fp16type adf16 (ad);
17438 const float bc (b * c);
17439 const fp16type bcf16 (bc);
17440 const float det (adf16.asFloat() - bcf16.asFloat());
17441 const fp16type det16 (det);
17443 out[0] = fp16type( d / det16.asFloat()).bits();
17444 out[1] = fp16type(-c / det16.asFloat()).bits();
17445 out[2] = fp16type(-b / det16.asFloat()).bits();
17446 out[3] = fp16type( a / det16.asFloat()).bits();
17448 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17449 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17451 const size_t ndx (colNdx * alignedRows + rowNdx);
17452 const fp16type s (out[ndx]);
17454 min[ndx] = getMin(s.asDouble(), getULPs(in));
17455 max[ndx] = getMax(s.asDouble(), getULPs(in));
17462 inline std::string fp16ToString(deFloat16 val)
17464 return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17467 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
17468 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
17470 if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17473 const size_t resultStep = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17474 const size_t iterationsCount = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17475 const size_t inputsSteps[3] =
17477 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17478 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17479 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17482 DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17483 DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17485 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17487 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17488 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17491 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
17492 TestedArithmeticFunction func;
17494 func.setOutCompCount(RES_COMPONENTS);
17495 func.setArgCompCount(0, ARG0_COMPONENTS);
17496 func.setArgCompCount(1, ARG1_COMPONENTS);
17497 func.setArgCompCount(2, ARG2_COMPONENTS);
17499 const bool callOncePerComponent = func.callOncePerComponent();
17500 const deUint32 componentValidityMask = func.getComponentValidity();
17501 const size_t denormModesCount = 2;
17502 const char* denormModes[denormModesCount] = { "keep denormal numbers", "flush to zero" };
17503 const size_t successfulRunsPerComponent = denormModesCount * func.getFlavorCount();
17504 bool success = true;
17505 size_t validatedCount = 0;
17507 vector<deUint8> inputBytes[3];
17509 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17510 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17512 const deFloat16* const inputsAsFP16[3] =
17514 inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
17515 inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
17516 inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
17519 for (size_t idx = 0; idx < iterationsCount; ++idx)
17521 std::vector<size_t> successfulRuns (RES_COMPONENTS, successfulRunsPerComponent);
17522 std::vector<std::string> errors (RES_COMPONENTS);
17523 bool iterationValidated (true);
17525 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17527 for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17529 func.setFlavor(flavorNdx);
17531 const deFloat16* iterationOutputFP16 = &outputAsFP16[idx * resultStep];
17532 vector<deFloat16> iterationCalculatedFP16 (resultStep, 0);
17533 vector<double> iterationEdgeMin (resultStep, 0.0);
17534 vector<double> iterationEdgeMax (resultStep, 0.0);
17535 vector<const deFloat16*> arguments;
17537 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17540 bool reportError = false;
17542 if (callOncePerComponent || componentNdx == 0)
17544 bool funcCallResult;
17548 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17549 arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17551 if (denormNdx == 0)
17552 funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17554 funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17556 if (!funcCallResult)
17558 iterationValidated = false;
17560 if (callOncePerComponent)
17567 if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17570 reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17574 tcu::Float16 expected (iterationCalculatedFP16[componentNdx]);
17575 tcu::Float16 outputted (iterationOutputFP16[componentNdx]);
17576 tcu::Float64 edgeMin (iterationEdgeMin[componentNdx]);
17577 tcu::Float64 edgeMax (iterationEdgeMax[componentNdx]);
17579 if (reportError && expected.isNaN())
17580 reportError = false;
17582 if (reportError && !expected.isNaN() && !outputted.isNaN())
17584 if (reportError && !expected.isInf() && !outputted.isInf())
17587 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17588 reportError = false;
17591 if (reportError && expected.isInf())
17593 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17594 if (expected.sign() == 1 && outputted.bits() == 0x7bff && edgeMin.asDouble() <= std::numeric_limits<double>::max())
17595 reportError = false;
17596 else if (expected.sign() == -1 && outputted.bits() == 0xfbff && edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17597 reportError = false;
17602 const double outputtedDouble = outputted.asDouble();
17604 DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() || (edgeMin.asDouble() <= edgeMax.asDouble()));
17606 if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17607 reportError = false;
17613 const size_t inputsComps[3] =
17619 string inputsValues ("Inputs:");
17620 string flavorName (func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17621 std::stringstream errStream;
17623 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17625 const size_t inputCompsCount = inputsComps[inputNdx];
17627 inputsValues += " [" + de::toString(inputNdx) + "]=(";
17629 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17631 const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17633 inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17638 << " iteration " << de::toString(idx)
17639 << " component " << de::toString(componentNdx)
17640 << " denormMode " << de::toString(denormNdx)
17641 << " (" << denormModes[denormNdx] << ")"
17642 << " " << flavorName
17643 << " " << inputsValues
17644 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17645 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17646 << " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17647 << " " << error << "."
17650 errors[componentNdx] += errStream.str();
17652 successfulRuns[componentNdx]--;
17659 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17661 // Check if any component has total failure
17662 if (successfulRuns[componentNdx] == 0)
17664 // Test failed in all denorm modes and all flavors for certain component: dump errors
17665 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17671 if (iterationValidated)
17675 if (validatedCount < 16)
17676 TCU_THROW(InternalError, "Too few samples have been validated.");
17681 // IEEE-754 floating point numbers:
17682 // +--------+------+----------+-------------+
17683 // | binary | sign | exponent | significand |
17684 // +--------+------+----------+-------------+
17685 // | 16-bit | 1 | 5 | 10 |
17686 // +--------+------+----------+-------------+
17687 // | 32-bit | 1 | 8 | 23 |
17688 // +--------+------+----------+-------------+
17692 // 0 000 00 00 0000 0001 (0x0001: 2e-24: minimum positive denormalized)
17693 // 0 000 00 11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17694 // 0 000 01 00 0000 0000 (0x0400: 2e-14: minimum positive normalized)
17695 // 0 111 10 11 1111 1111 (0x7bff: 65504: maximum positive normalized)
17697 // 0 000 00 00 0000 0000 (0x0000: +0)
17698 // 0 111 11 00 0000 0000 (0x7c00: +Inf)
17699 // 0 000 00 11 1111 0000 (0x03f0: +Denorm)
17700 // 0 000 01 00 0000 0001 (0x0401: +Norm)
17701 // 0 111 11 00 0000 1111 (0x7c0f: +SNaN)
17702 // 0 111 11 11 1111 0000 (0x7ff0: +QNaN)
17703 // Generate and return 16-bit floats and their corresponding 32-bit values.
17705 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17706 // Expected count to be at least 14 (numPicks).
17707 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17709 vector<deFloat16> float16;
17711 float16.reserve(count);
17714 float16.push_back(deUint16(0x0000));
17715 float16.push_back(deUint16(0x8000));
17717 float16.push_back(deUint16(0x7c00));
17718 float16.push_back(deUint16(0xfc00));
17720 float16.push_back(deUint16(0x0401));
17721 float16.push_back(deUint16(0x8401));
17722 // Some normal number
17723 float16.push_back(deUint16(0x14cb));
17724 float16.push_back(deUint16(0x94cb));
17725 // Min/max positive normal
17726 float16.push_back(deUint16(0x0400));
17727 float16.push_back(deUint16(0x7bff));
17728 // Min/max negative normal
17729 float16.push_back(deUint16(0x8400));
17730 float16.push_back(deUint16(0xfbff));
17732 float16.push_back(deUint16(0x4248)); // 3.140625
17733 float16.push_back(deUint16(0xb248)); // -3.140625
17735 float16.push_back(deUint16(0x3e48)); // 1.5703125
17736 float16.push_back(deUint16(0xbe48)); // -1.5703125
17737 float16.push_back(deUint16(0x3c00)); // 1.0
17738 float16.push_back(deUint16(0x3800)); // 0.5
17739 // Some useful constants
17740 float16.push_back(tcu::Float16(-2.5f).bits());
17741 float16.push_back(tcu::Float16(-1.0f).bits());
17742 float16.push_back(tcu::Float16( 0.4f).bits());
17743 float16.push_back(tcu::Float16( 2.5f).bits());
17745 const deUint32 numPicks = static_cast<deUint32>(float16.size());
17747 DE_ASSERT(count >= numPicks);
17750 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17752 int sign = (rnd.getUint16() % 2 == 0) ? +1 : -1;
17753 int exponent = (rnd.getUint16() % 29) - 14 + 1;
17754 deUint16 mantissa = static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17756 // Exclude power of -14 to avoid denorms
17757 DE_ASSERT(de::inRange(exponent, -13, 15));
17759 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17765 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17769 de::Random rnd(seed);
17771 return getFloat16a(rnd, static_cast<deUint32>(count));
17774 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17776 de::Random rnd (seed);
17777 size_t newCount = static_cast<size_t>(deSqrt(double(count)));
17779 DE_ASSERT(newCount * newCount == count);
17781 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17783 return squarize(float16, static_cast<deUint32>(argNo));
17786 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17788 if (argNo == 0 || argNo == 1)
17789 return getInputData2(seed, count, argNo);
17791 return getInputData1(seed<<argNo, count, argNo);
17794 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17798 vector<deFloat16> result;
17802 case 1:result = getInputData1(seed, count, argNo); break;
17803 case 2:result = getInputData2(seed, count, argNo); break;
17804 case 3:result = getInputData3(seed, count, argNo); break;
17805 default: TCU_THROW(InternalError, "Invalid argument count specified");
17808 if (compCount == 3)
17810 const size_t newCount = (3 * count) / 4;
17811 vector<deFloat16> newResult;
17813 newResult.reserve(result.size());
17815 for (size_t ndx = 0; ndx < newCount; ++ndx)
17817 newResult.push_back(result[ndx]);
17820 newResult.push_back(0);
17823 result = newResult;
17826 DE_ASSERT(result.size() == count);
17831 // Generator for functions requiring data in range [1, inf]
17832 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17834 vector<deFloat16> result;
17836 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17838 // Filter out values below 1.0 from upper half of numbers
17839 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17841 const float f = tcu::Float16(result[idx]).asFloat();
17844 result[idx] = tcu::Float16(1.0f - f).bits();
17850 // Generator for functions requiring data in range [-1, 1]
17851 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17853 vector<deFloat16> result;
17855 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17857 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17859 const float f = tcu::Float16(result[idx]).asFloat();
17861 if (!de::inRange(f, -1.0f, 1.0f))
17862 result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17868 // Generator for functions requiring data in range [-pi, pi]
17869 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17871 vector<deFloat16> result;
17873 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17875 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17877 const float f = tcu::Float16(result[idx]).asFloat();
17879 if (!de::inRange(f, -DE_PI, DE_PI))
17880 result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17886 // Generator for functions requiring data in range [0, inf]
17887 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17889 vector<deFloat16> result;
17891 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17895 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17896 result[idx] &= static_cast<deFloat16>(~0x8000);
17902 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17905 DE_UNREF(argCount);
17907 vector<deFloat16> result;
17910 result = getInputData2(seed, count, argNo);
17913 const size_t alignedCount = (compCount == 3) ? 4 : compCount;
17914 const size_t newCountX = static_cast<size_t>(deSqrt(double(count * alignedCount)));
17915 const size_t newCountY = count / newCountX;
17916 de::Random rnd (seed);
17917 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<deUint32>(newCountX));
17919 DE_ASSERT(newCountX * newCountX == alignedCount * count);
17921 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
17923 const vector<deFloat16> tmp(newCountY, float16[numIdx]);
17925 result.insert(result.end(), tmp.begin(), tmp.end());
17929 DE_ASSERT(result.size() == count);
17934 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17936 DE_UNREF(compCount);
17938 DE_UNREF(argCount);
17940 de::Random rnd (seed << argNo);
17941 vector<deFloat16> result;
17943 result = getFloat16a(rnd, static_cast<deUint32>(count));
17945 DE_ASSERT(result.size() == count);
17950 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17952 DE_UNREF(compCount);
17953 DE_UNREF(argCount);
17955 de::Random rnd (seed << argNo);
17956 vector<deFloat16> result;
17958 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17960 int num = (rnd.getUint16() % 16) - 8;
17962 result.push_back(tcu::Float16(float(num)).bits());
17965 result[0 * stride] = deUint16(0x7c00); // +Inf
17966 result[1 * stride] = deUint16(0xfc00); // -Inf
17968 DE_ASSERT(result.size() == count);
17973 // Generator for smoothstep function
17974 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17976 vector<deFloat16> result;
17978 result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
17982 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17984 const float f = tcu::Float16(result[idx]).asFloat();
17987 result[idx] = tcu::Float16(-f).bits();
17993 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17995 const float f = tcu::Float16(result[idx]).asFloat();
17998 result[idx] = tcu::Float16(-f).bits();
18005 // Generates normalized vectors for arguments 0 and 1
18006 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18008 DE_UNREF(compCount);
18009 DE_UNREF(argCount);
18011 de::Random rnd (seed << argNo);
18012 vector<deFloat16> result;
18014 if (argNo == 0 || argNo == 1)
18016 // The input parameters for the incident vector I and the surface normal N must already be normalized
18017 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18019 vector <float> unnormolized;
18022 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18023 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18025 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18026 sum += unnormolized[compIdx] * unnormolized[compIdx];
18028 sum = deFloatSqrt(sum);
18030 unnormolized[0] = sum = 1.0f;
18032 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18033 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18035 for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18036 result.push_back(0);
18041 // Input parameter eta
18042 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18044 int num = (rnd.getUint16() % 16) - 8;
18046 result.push_back(tcu::Float16(float(num)).bits());
18050 DE_ASSERT(result.size() == count);
18055 // Data generator for complex matrix functions like determinant and inverse
18056 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18058 DE_UNREF(compCount);
18060 DE_UNREF(argCount);
18062 de::Random rnd (seed << argNo);
18063 vector<deFloat16> result;
18065 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18067 int num = (rnd.getUint16() % 16) - 8;
18069 result.push_back(tcu::Float16(float(num)).bits());
18072 DE_ASSERT(result.size() == count);
18077 struct Math16TestType
18079 const char* typePrefix;
18080 const size_t typeComponents;
18081 const size_t typeArrayStride;
18082 const size_t typeStructStride;
18083 const char* storage_type;
18086 enum Math16DataTypes
18105 struct Math16ArgFragments
18107 const char* bodies;
18108 const char* variables;
18109 const char* decorations;
18110 const char* funcVariables;
18113 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
18115 struct Math16TestFunc
18117 const char* funcName;
18118 const char* funcSuffix;
18119 size_t funcArgsCount;
18124 Math16GetInputData* getInputDataFunc;
18125 VerifyIOFunc verifyFunc;
18128 template<class SpecResource>
18129 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
18131 const int testSpecificSeed = deStringHash(testGroup.getName());
18132 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18133 const size_t numDataPointsByAxis = 32;
18134 const size_t numDataPoints = numDataPointsByAxis * numDataPointsByAxis;
18135 const char* componentType = "f16";
18136 const Math16TestType testTypes[MATH16_TYPE_LAST] =
18138 { "", 0, 0, 0, "" },
18139 { "", 1, 1 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_half_ndp" },
18140 { "v2", 2, 2 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_ndp" },
18141 { "v3", 3, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18142 { "v4", 4, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18143 { "m2x2", 0, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18144 { "m2x3", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18145 { "m2x4", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18146 { "m3x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_3" },
18147 { "m3x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18148 { "m3x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18149 { "m4x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18150 { "m4x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18151 { "m4x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18154 DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18157 const StringTemplate preMain
18159 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
18161 " %f16 = OpTypeFloat 16\n"
18162 " %v2f16 = OpTypeVector %f16 2\n"
18163 " %v3f16 = OpTypeVector %f16 3\n"
18164 " %v4f16 = OpTypeVector %f16 4\n"
18165 " %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18166 " %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18167 " %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18168 " %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18169 " %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18170 " %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18171 " %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18172 " %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18173 " %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18175 " %fp_v2i32 = OpTypePointer Function %v2i32\n"
18176 " %fp_v3i32 = OpTypePointer Function %v3i32\n"
18177 " %fp_v4i32 = OpTypePointer Function %v4i32\n"
18179 " %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18180 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18181 " %c_u32_5 = OpConstant %u32 5\n"
18182 " %c_u32_6 = OpConstant %u32 6\n"
18183 " %c_u32_7 = OpConstant %u32 7\n"
18184 " %c_u32_8 = OpConstant %u32 8\n"
18185 " %c_f16_0 = OpConstant %f16 0\n"
18186 " %c_f16_1 = OpConstant %f16 1\n"
18187 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18188 " %up_u32 = OpTypePointer Uniform %u32\n"
18189 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18190 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18192 " %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18193 " %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18194 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18195 " %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18196 " %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18197 " %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18198 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18199 " %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18200 " %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18201 " %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18202 " %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18203 " %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18204 " %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18205 " %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18206 " %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18207 " %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18208 " %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18209 " %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18210 " %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18211 " %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18212 " %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18213 " %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18214 " %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18215 " %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18216 " %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18217 " %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18218 " %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18219 " %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18220 " %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18221 " %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18222 " %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18224 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18225 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18226 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18227 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18228 " %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18229 " %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18230 " %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18231 " %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18232 " %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18233 " %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18234 " %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18235 " %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18236 " %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18237 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18238 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18239 " %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18240 " %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18241 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18242 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18243 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18244 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18245 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18246 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18247 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18248 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18249 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18253 const StringTemplate decoration
18255 "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18256 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18257 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18259 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
18260 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18261 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
18263 "OpDecorate %ra_u32_2 ArrayStride 4\n"
18264 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18265 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18266 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18268 "OpDecorate %ra_u32_4 ArrayStride 4\n"
18269 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18270 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18271 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18273 "OpDecorate %ra_u32_3 ArrayStride 4\n"
18274 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18275 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18276 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18278 "OpDecorate %ra_u32_6 ArrayStride 4\n"
18279 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18280 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18281 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18283 "OpDecorate %ra_u32_8 ArrayStride 4\n"
18284 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18285 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18286 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18288 "${arg_decorations}"
18291 const StringTemplate testFun
18293 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18294 " %param = OpFunctionParameter %v4f32\n"
18295 " %entry = OpLabel\n"
18297 " %i = OpVariable %fp_i32 Function\n"
18298 "${arg_infunc_vars}"
18299 " OpStore %i %c_i32_0\n"
18300 " OpBranch %loop\n"
18302 " %loop = OpLabel\n"
18303 " %i_cmp = OpLoad %i32 %i\n"
18304 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18305 " OpLoopMerge %merge %next None\n"
18306 " OpBranchConditional %lt %write %merge\n"
18308 " %write = OpLabel\n"
18309 " %ndx = OpLoad %i32 %i\n"
18313 " OpBranch %next\n"
18315 " %next = OpLabel\n"
18316 " %i_cur = OpLoad %i32 %i\n"
18317 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18318 " OpStore %i %i_new\n"
18319 " OpBranch %loop\n"
18321 " %merge = OpLabel\n"
18322 " OpReturnValue %param\n"
18326 const Math16ArgFragments argFragment1 =
18328 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18329 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18330 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18336 const Math16ArgFragments argFragment2 =
18338 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18339 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18340 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18341 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18347 const Math16ArgFragments argFragment3 =
18349 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18350 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18351 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18352 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18353 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18359 const Math16ArgFragments argFragmentLdExp =
18361 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18362 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18363 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18364 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18365 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18374 const Math16ArgFragments argFragmentModfFrac =
18376 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18377 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18378 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18380 " %fp_tmp = OpTypePointer Function %${tr}\n",
18384 " %tmp = OpVariable %fp_tmp Function\n",
18387 const Math16ArgFragments argFragmentModfInt =
18389 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18390 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18391 " %tmp0 = OpAccessChain %fp_tmp %tmp\n"
18392 " %val_dst = OpLoad %${tr} %tmp0\n"
18393 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18395 " %fp_tmp = OpTypePointer Function %${tr}\n",
18399 " %tmp = OpVariable %fp_tmp Function\n",
18402 const Math16ArgFragments argFragmentModfStruct =
18404 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18405 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18406 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18407 " OpStore %tmp_ptr_s %val_tmp\n"
18408 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18409 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18410 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18412 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18413 " %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18414 " %fp_tmp = OpTypePointer Function %st_tmp\n"
18415 " %c_frac = OpConstant %i32 0\n"
18416 " %c_int = OpConstant %i32 1\n",
18418 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18419 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18421 " %tmp = OpVariable %fp_tmp Function\n",
18424 const Math16ArgFragments argFragmentFrexpStructS =
18426 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18427 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18428 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18429 " OpStore %tmp_ptr_s %val_tmp\n"
18430 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18431 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18432 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18434 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18435 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18436 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18438 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18439 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18441 " %tmp = OpVariable %fp_tmp Function\n",
18444 const Math16ArgFragments argFragmentFrexpStructE =
18446 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18447 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18448 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18449 " OpStore %tmp_ptr_s %val_tmp\n"
18450 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18451 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18452 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18453 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18455 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18456 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18458 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18459 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18461 " %tmp = OpVariable %fp_tmp Function\n",
18464 const Math16ArgFragments argFragmentFrexpS =
18466 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18467 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18468 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18469 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18475 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18478 const Math16ArgFragments argFragmentFrexpE =
18480 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18481 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18482 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18483 "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
18484 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18485 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18491 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18494 string load_funcs[MATH16_TYPE_LAST];
18495 load_funcs[SCALAR] = loadScalarF16FromUint;
18496 load_funcs[VEC2] = loadV2F16FromUint;
18497 load_funcs[VEC3] = loadV3F16FromUints;
18498 load_funcs[VEC4] = loadV4F16FromUints;
18499 load_funcs[MAT2X2] = loadM2x2F16FromUints;
18500 load_funcs[MAT2X3] = loadM2x3F16FromUints;
18501 load_funcs[MAT2X4] = loadM2x4F16FromUints;
18502 load_funcs[MAT3X2] = loadM3x2F16FromUints;
18503 load_funcs[MAT3X3] = loadM3x3F16FromUints;
18504 load_funcs[MAT3X4] = loadM3x4F16FromUints;
18505 load_funcs[MAT4X2] = loadM4x2F16FromUints;
18506 load_funcs[MAT4X3] = loadM4x3F16FromUints;
18507 load_funcs[MAT4X4] = loadM4x4F16FromUints;
18509 string store_funcs[MATH16_TYPE_LAST];
18510 store_funcs[SCALAR] = storeScalarF16AsUint;
18511 store_funcs[VEC2] = storeV2F16AsUint;
18512 store_funcs[VEC3] = storeV3F16AsUints;
18513 store_funcs[VEC4] = storeV4F16AsUints;
18514 store_funcs[MAT2X2] = storeM2x2F16AsUints;
18515 store_funcs[MAT2X3] = storeM2x3F16AsUints;
18516 store_funcs[MAT2X4] = storeM2x4F16AsUints;
18517 store_funcs[MAT3X2] = storeM3x2F16AsUints;
18518 store_funcs[MAT3X3] = storeM3x3F16AsUints;
18519 store_funcs[MAT3X4] = storeM3x4F16AsUints;
18520 store_funcs[MAT4X2] = storeM4x2F16AsUints;
18521 store_funcs[MAT4X3] = storeM4x3F16AsUints;
18522 store_funcs[MAT4X4] = storeM4x4F16AsUints;
18524 const Math16TestType& testType = testTypes[testTypeIdx];
18525 const string funcNameString = string(testFunc.funcName) + string(testFunc.funcSuffix);
18526 const string testName = de::toLower(funcNameString);
18527 const Math16ArgFragments* argFragments = DE_NULL;
18528 const size_t typeStructStride = testType.typeStructStride;
18529 const bool extInst = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18530 const size_t numFloatsPerArg0Type = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18531 const size_t iterations = numDataPoints / numFloatsPerArg0Type;
18532 const size_t numFloatsPerResultType = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18533 const vector<deFloat16> float16UnusedOutput (iterations * numFloatsPerResultType, 0);
18534 VulkanFeatures features;
18535 SpecResource specResource;
18536 map<string, string> specs;
18537 map<string, string> fragments;
18538 vector<string> extensions;
18540 string funcVariables;
18542 string declarations;
18543 string decorations;
18546 switch (testFunc.funcArgsCount)
18550 argFragments = &argFragment1;
18552 if (funcNameString == "ModfFrac") argFragments = &argFragmentModfFrac;
18553 if (funcNameString == "ModfInt") argFragments = &argFragmentModfInt;
18554 if (funcNameString == "ModfStructFrac") argFragments = &argFragmentModfStruct;
18555 if (funcNameString == "ModfStructInt") argFragments = &argFragmentModfStruct;
18556 if (funcNameString == "FrexpS") argFragments = &argFragmentFrexpS;
18557 if (funcNameString == "FrexpE") argFragments = &argFragmentFrexpE;
18558 if (funcNameString == "FrexpStructS") argFragments = &argFragmentFrexpStructS;
18559 if (funcNameString == "FrexpStructE") argFragments = &argFragmentFrexpStructE;
18565 argFragments = &argFragment2;
18567 if (funcNameString == "Ldexp") argFragments = &argFragmentLdExp;
18573 argFragments = &argFragment3;
18579 TCU_THROW(InternalError, "Invalid number of arguments");
18583 functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18584 if (testFunc.funcArgsCount == 1)
18586 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18588 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18589 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18592 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18593 "OpDecorate %ssbo_src0 Binding 0\n"
18594 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18595 "OpDecorate %ssbo_dst Binding 1\n";
18597 else if (testFunc.funcArgsCount == 2)
18599 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18600 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18602 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18603 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18604 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18607 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18608 "OpDecorate %ssbo_src0 Binding 0\n"
18609 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18610 "OpDecorate %ssbo_src1 Binding 1\n"
18611 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18612 "OpDecorate %ssbo_dst Binding 2\n";
18614 else if (testFunc.funcArgsCount == 3)
18616 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18617 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18618 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18620 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18621 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18622 " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18623 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18626 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18627 "OpDecorate %ssbo_src0 Binding 0\n"
18628 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18629 "OpDecorate %ssbo_src1 Binding 1\n"
18630 "OpDecorate %ssbo_src2 DescriptorSet 0\n"
18631 "OpDecorate %ssbo_src2 Binding 2\n"
18632 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18633 "OpDecorate %ssbo_dst Binding 3\n";
18637 TCU_THROW(InternalError, "Invalid number of function arguments");
18640 variables += argFragments->variables;
18641 decorations += argFragments->decorations;
18643 specs["dr"] = testTypes[testFunc.typeResult].typePrefix;
18644 specs["d0"] = testTypes[testFunc.typeArg0].typePrefix;
18645 specs["d1"] = testTypes[testFunc.typeArg1].typePrefix;
18646 specs["d2"] = testTypes[testFunc.typeArg2].typePrefix;
18647 specs["tr"] = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18648 specs["t0"] = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18649 specs["t1"] = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18650 specs["t2"] = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18651 specs["store_tr"] = string(testTypes[testFunc.typeResult].storage_type);
18652 specs["store_t0"] = string(testTypes[testFunc.typeArg0].storage_type);
18653 specs["store_t1"] = string(testTypes[testFunc.typeArg1].storage_type);
18654 specs["store_t2"] = string(testTypes[testFunc.typeArg2].storage_type);
18655 specs["struct_stride"] = de::toString(typeStructStride);
18656 specs["op"] = extInst ? "OpExtInst" : testFunc.funcName;
18657 specs["ext_inst"] = extInst ? string("%ext_import ") + testFunc.funcName : "";
18658 specs["struct_member"] = de::toLower(testFunc.funcSuffix);
18660 variables = StringTemplate(variables).specialize(specs);
18661 decorations = StringTemplate(decorations).specialize(specs);
18662 funcVariables = StringTemplate(argFragments->funcVariables).specialize(specs);
18663 funcCall = StringTemplate(argFragments->bodies).specialize(specs);
18665 specs["num_data_points"] = de::toString(iterations);
18666 specs["arg_vars"] = variables;
18667 specs["arg_decorations"] = decorations;
18668 specs["arg_infunc_vars"] = funcVariables;
18669 specs["arg_func_call"] = funcCall;
18671 fragments["extension"] = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18672 fragments["capability"] = "OpCapability Matrix\nOpCapability Float16\n";
18673 fragments["decoration"] = decoration.specialize(specs);
18674 fragments["pre_main"] = preMain.specialize(specs) + functions;
18675 fragments["testfun"] = testFun.specialize(specs);
18677 for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18679 const size_t numFloatsPerItem = (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18680 : (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18681 : (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18683 const vector<deFloat16> inputData = testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18685 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18688 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18689 specResource.verifyIO = testFunc.verifyFunc;
18691 extensions.push_back("VK_KHR_shader_float16_int8");
18693 features.extFloat16Int8.shaderFloat16 = true;
18695 finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18698 template<size_t C, class SpecResource>
18699 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18701 DE_STATIC_ASSERT(C >= 1 && C <= 4);
18703 const std::string testGroupName (string("arithmetic_") + de::toString(C));
18704 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18705 const Math16TestFunc testFuncs[] =
18707 { "OpFNegate", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16OpFNegate> },
18708 { "Round", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Round> },
18709 { "RoundEven", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16RoundEven> },
18710 { "Trunc", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Trunc> },
18711 { "FAbs", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FAbs> },
18712 { "FSign", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FSign> },
18713 { "Floor", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Floor> },
18714 { "Ceil", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Ceil> },
18715 { "Fract", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Fract> },
18716 { "Radians", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Radians> },
18717 { "Degrees", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Degrees> },
18718 { "Sin", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sin> },
18719 { "Cos", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Cos> },
18720 { "Tan", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Tan> },
18721 { "Asin", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Asin> },
18722 { "Acos", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Acos> },
18723 { "Atan", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Atan> },
18724 { "Sinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sinh> },
18725 { "Cosh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Cosh> },
18726 { "Tanh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Tanh> },
18727 { "Asinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Asinh> },
18728 { "Acosh", "", 1, C, C, 0, 0, &getInputDataAC, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Acosh> },
18729 { "Atanh", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Atanh> },
18730 { "Exp", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Exp> },
18731 { "Log", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Log> },
18732 { "Exp2", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Exp2> },
18733 { "Log2", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Log2> },
18734 { "Sqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sqrt> },
18735 { "InverseSqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16InverseSqrt> },
18736 { "Modf", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfFrac> },
18737 { "Modf", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfInt> },
18738 { "ModfStruct", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfFrac> },
18739 { "ModfStruct", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfInt> },
18740 { "Frexp", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpS> },
18741 { "Frexp", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpE> },
18742 { "FrexpStruct", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpS> },
18743 { "FrexpStruct", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpE> },
18744 { "OpFAdd", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFAdd> },
18745 { "OpFSub", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFSub> },
18746 { "OpFMul", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFMul> },
18747 { "OpFDiv", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFDiv> },
18748 { "Atan2", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Atan2> },
18749 { "Pow", "", 2, C, C, C, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, C, 0, fp16Pow> },
18750 { "FMin", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16FMin> },
18751 { "FMax", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16FMax> },
18752 { "Step", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Step> },
18753 { "Ldexp", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Ldexp> },
18754 { "FClamp", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc< C, C, C, C, fp16FClamp> },
18755 { "FMix", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, C, fp16FMix> },
18756 { "SmoothStep", "", 3, C, C, C, C, &getInputDataSS, compareFP16ArithmeticFunc< C, C, C, C, fp16SmoothStep> },
18757 { "Fma", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc< C, C, C, C, fp16Fma> },
18758 { "Length", "", 1, 1, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< 1, C, 0, 0, fp16Length> },
18759 { "Distance", "", 2, 1, C, C, 0, &getInputData, compareFP16ArithmeticFunc< 1, C, C, 0, fp16Distance> },
18760 { "Cross", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, 0, fp16Cross> },
18761 { "Normalize", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Normalize> },
18762 { "FaceForward", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, C, fp16FaceForward> },
18763 { "Reflect", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, 0, fp16Reflect> },
18764 { "Refract", "", 3, C, C, C, 1, &getInputDataN, compareFP16ArithmeticFunc< C, C, C, 1, fp16Refract> },
18765 { "OpDot", "", 2, 1, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< 1, C, C, 0, fp16Dot> },
18766 { "OpVectorTimesScalar", "", 2, C, C, 1, 0, &getInputDataV, compareFP16ArithmeticFunc< C, C, 1, 0, fp16VectorTimesScalar> },
18769 for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18771 const Math16TestFunc& testFunc = testFuncs[testFuncIdx];
18772 const string funcNameString = testFunc.funcName;
18774 if ((C != 3) && funcNameString == "Cross")
18777 if ((C < 2) && funcNameString == "OpDot")
18780 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18783 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18786 return testGroup.release();
18789 template<class SpecResource>
18790 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18792 const std::string testGroupName ("arithmetic");
18793 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18794 const Math16TestFunc testFuncs[] =
18796 { "OpTranspose", "2x2", 1, MAT2X2, MAT2X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 4, 4, 0, 0, fp16Transpose<2,2> > },
18797 { "OpTranspose", "3x2", 1, MAT2X3, MAT3X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<3,2> > },
18798 { "OpTranspose", "4x2", 1, MAT2X4, MAT4X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<4,2> > },
18799 { "OpTranspose", "2x3", 1, MAT3X2, MAT2X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<2,3> > },
18800 { "OpTranspose", "3x3", 1, MAT3X3, MAT3X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<3,3> > },
18801 { "OpTranspose", "4x3", 1, MAT3X4, MAT4X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<4,3> > },
18802 { "OpTranspose", "2x4", 1, MAT4X2, MAT2X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<2,4> > },
18803 { "OpTranspose", "3x4", 1, MAT4X3, MAT3X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<3,4> > },
18804 { "OpTranspose", "4x4", 1, MAT4X4, MAT4X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<4,4> > },
18805 { "OpMatrixTimesScalar", "2x2", 2, MAT2X2, MAT2X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 1, 0, fp16MatrixTimesScalar<2,2> > },
18806 { "OpMatrixTimesScalar", "2x3", 2, MAT2X3, MAT2X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<2,3> > },
18807 { "OpMatrixTimesScalar", "2x4", 2, MAT2X4, MAT2X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<2,4> > },
18808 { "OpMatrixTimesScalar", "3x2", 2, MAT3X2, MAT3X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<3,2> > },
18809 { "OpMatrixTimesScalar", "3x3", 2, MAT3X3, MAT3X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<3,3> > },
18810 { "OpMatrixTimesScalar", "3x4", 2, MAT3X4, MAT3X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<3,4> > },
18811 { "OpMatrixTimesScalar", "4x2", 2, MAT4X2, MAT4X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<4,2> > },
18812 { "OpMatrixTimesScalar", "4x3", 2, MAT4X3, MAT4X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<4,3> > },
18813 { "OpMatrixTimesScalar", "4x4", 2, MAT4X4, MAT4X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<4,4> > },
18814 { "OpVectorTimesMatrix", "2x2", 2, VEC2, VEC2, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 2, 4, 0, fp16VectorTimesMatrix<2,2> > },
18815 { "OpVectorTimesMatrix", "2x3", 2, VEC2, VEC3, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 3, 8, 0, fp16VectorTimesMatrix<2,3> > },
18816 { "OpVectorTimesMatrix", "2x4", 2, VEC2, VEC4, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 4, 8, 0, fp16VectorTimesMatrix<2,4> > },
18817 { "OpVectorTimesMatrix", "3x2", 2, VEC3, VEC2, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 2, 8, 0, fp16VectorTimesMatrix<3,2> > },
18818 { "OpVectorTimesMatrix", "3x3", 2, VEC3, VEC3, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 3, 16, 0, fp16VectorTimesMatrix<3,3> > },
18819 { "OpVectorTimesMatrix", "3x4", 2, VEC3, VEC4, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 4, 16, 0, fp16VectorTimesMatrix<3,4> > },
18820 { "OpVectorTimesMatrix", "4x2", 2, VEC4, VEC2, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 2, 8, 0, fp16VectorTimesMatrix<4,2> > },
18821 { "OpVectorTimesMatrix", "4x3", 2, VEC4, VEC3, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 3, 16, 0, fp16VectorTimesMatrix<4,3> > },
18822 { "OpVectorTimesMatrix", "4x4", 2, VEC4, VEC4, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 16, 0, fp16VectorTimesMatrix<4,4> > },
18823 { "OpMatrixTimesVector", "2x2", 2, VEC2, MAT2X2, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 4, 2, 0, fp16MatrixTimesVector<2,2> > },
18824 { "OpMatrixTimesVector", "2x3", 2, VEC3, MAT2X3, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 8, 2, 0, fp16MatrixTimesVector<2,3> > },
18825 { "OpMatrixTimesVector", "2x4", 2, VEC4, MAT2X4, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 2, 0, fp16MatrixTimesVector<2,4> > },
18826 { "OpMatrixTimesVector", "3x2", 2, VEC2, MAT3X2, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 8, 3, 0, fp16MatrixTimesVector<3,2> > },
18827 { "OpMatrixTimesVector", "3x3", 2, VEC3, MAT3X3, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 16, 3, 0, fp16MatrixTimesVector<3,3> > },
18828 { "OpMatrixTimesVector", "3x4", 2, VEC4, MAT3X4, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 16, 3, 0, fp16MatrixTimesVector<3,4> > },
18829 { "OpMatrixTimesVector", "4x2", 2, VEC2, MAT4X2, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 8, 4, 0, fp16MatrixTimesVector<4,2> > },
18830 { "OpMatrixTimesVector", "4x3", 2, VEC3, MAT4X3, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 16, 4, 0, fp16MatrixTimesVector<4,3> > },
18831 { "OpMatrixTimesVector", "4x4", 2, VEC4, MAT4X4, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 16, 4, 0, fp16MatrixTimesVector<4,4> > },
18832 { "OpMatrixTimesMatrix", "2x2_2x2", 2, MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 4, 0, fp16MatrixTimesMatrix<2,2,2,2> > },
18833 { "OpMatrixTimesMatrix", "2x2_3x2", 2, MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 8, 0, fp16MatrixTimesMatrix<2,2,3,2> > },
18834 { "OpMatrixTimesMatrix", "2x2_4x2", 2, MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 8, 0, fp16MatrixTimesMatrix<2,2,4,2> > },
18835 { "OpMatrixTimesMatrix", "2x3_2x2", 2, MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 4, 0, fp16MatrixTimesMatrix<2,3,2,2> > },
18836 { "OpMatrixTimesMatrix", "2x3_3x2", 2, MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,3,3,2> > },
18837 { "OpMatrixTimesMatrix", "2x3_4x2", 2, MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,3,4,2> > },
18838 { "OpMatrixTimesMatrix", "2x4_2x2", 2, MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 4, 0, fp16MatrixTimesMatrix<2,4,2,2> > },
18839 { "OpMatrixTimesMatrix", "2x4_3x2", 2, MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,4,3,2> > },
18840 { "OpMatrixTimesMatrix", "2x4_4x2", 2, MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,4,4,2> > },
18841 { "OpMatrixTimesMatrix", "3x2_2x3", 2, MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 8, 0, fp16MatrixTimesMatrix<3,2,2,3> > },
18842 { "OpMatrixTimesMatrix", "3x2_3x3", 2, MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<3,2,3,3> > },
18843 { "OpMatrixTimesMatrix", "3x2_4x3", 2, MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<3,2,4,3> > },
18844 { "OpMatrixTimesMatrix", "3x3_2x3", 2, MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<3,3,2,3> > },
18845 { "OpMatrixTimesMatrix", "3x3_3x3", 2, MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,3,3,3> > },
18846 { "OpMatrixTimesMatrix", "3x3_4x3", 2, MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,3,4,3> > },
18847 { "OpMatrixTimesMatrix", "3x4_2x3", 2, MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<3,4,2,3> > },
18848 { "OpMatrixTimesMatrix", "3x4_3x3", 2, MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,4,3,3> > },
18849 { "OpMatrixTimesMatrix", "3x4_4x3", 2, MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,4,4,3> > },
18850 { "OpMatrixTimesMatrix", "4x2_2x4", 2, MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 8, 0, fp16MatrixTimesMatrix<4,2,2,4> > },
18851 { "OpMatrixTimesMatrix", "4x2_3x4", 2, MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<4,2,3,4> > },
18852 { "OpMatrixTimesMatrix", "4x2_4x4", 2, MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<4,2,4,4> > },
18853 { "OpMatrixTimesMatrix", "4x3_2x4", 2, MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<4,3,2,4> > },
18854 { "OpMatrixTimesMatrix", "4x3_3x4", 2, MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,3,3,4> > },
18855 { "OpMatrixTimesMatrix", "4x3_4x4", 2, MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,3,4,4> > },
18856 { "OpMatrixTimesMatrix", "4x4_2x4", 2, MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<4,4,2,4> > },
18857 { "OpMatrixTimesMatrix", "4x4_3x4", 2, MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,4,3,4> > },
18858 { "OpMatrixTimesMatrix", "4x4_4x4", 2, MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,4,4,4> > },
18859 { "OpOuterProduct", "2x2", 2, MAT2X2, VEC2, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 2, 2, 0, fp16OuterProduct<2,2> > },
18860 { "OpOuterProduct", "2x3", 2, MAT2X3, VEC3, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 3, 2, 0, fp16OuterProduct<2,3> > },
18861 { "OpOuterProduct", "2x4", 2, MAT2X4, VEC4, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 2, 0, fp16OuterProduct<2,4> > },
18862 { "OpOuterProduct", "3x2", 2, MAT3X2, VEC2, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 2, 3, 0, fp16OuterProduct<3,2> > },
18863 { "OpOuterProduct", "3x3", 2, MAT3X3, VEC3, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 3, 3, 0, fp16OuterProduct<3,3> > },
18864 { "OpOuterProduct", "3x4", 2, MAT3X4, VEC4, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 4, 3, 0, fp16OuterProduct<3,4> > },
18865 { "OpOuterProduct", "4x2", 2, MAT4X2, VEC2, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 2, 4, 0, fp16OuterProduct<4,2> > },
18866 { "OpOuterProduct", "4x3", 2, MAT4X3, VEC3, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 3, 4, 0, fp16OuterProduct<4,3> > },
18867 { "OpOuterProduct", "4x4", 2, MAT4X4, VEC4, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 4, 4, 0, fp16OuterProduct<4,4> > },
18868 { "Determinant", "2x2", 1, SCALAR, MAT2X2, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 4, 0, 0, fp16Determinant<2> > },
18869 { "Determinant", "3x3", 1, SCALAR, MAT3X3, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 16, 0, 0, fp16Determinant<3> > },
18870 { "Determinant", "4x4", 1, SCALAR, MAT4X4, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 16, 0, 0, fp16Determinant<4> > },
18871 { "MatrixInverse", "2x2", 1, MAT2X2, MAT2X2, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 4, 4, 0, 0, fp16Inverse<2> > },
18874 for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18876 const Math16TestFunc& testFunc = testFuncs[testFuncIdx];
18878 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18881 return testGroup.release();
18884 struct ComparisonCase
18891 tcu::TestCaseGroup* createFloat32ComparisonComputeSet (tcu::TestContext& testCtx)
18893 const string testGroupName ("comparison_" + de::toString(C));
18894 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18895 #ifndef CTS_USES_VULKANSC
18896 const char* dataDir = "spirv_assembly/instruction/float32/comparison";
18898 const ComparisonCase amberTests[] =
18900 { "modfstruct", "modf and modfStruct" },
18901 { "frexpstruct", "frexp and frexpStruct" }
18904 for (ComparisonCase test : amberTests)
18906 const string caseDesc ("Compare output of " + test.desc);
18907 const string fileName (test.name + "_" + de::toString(C) + "_comp.amber");
18909 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18916 return testGroup.release();
18922 vector<string> requirement;
18926 tcu::TestCaseGroup* createFloat32ComparisonGraphicsSet (tcu::TestContext& testCtx)
18928 const string testGroupName ("comparison_" + de::toString(C));
18929 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18930 #ifndef CTS_USES_VULKANSC
18931 const char* dataDir = "spirv_assembly/instruction/float32/comparison";
18933 const ShaderStage stages[] =
18935 { "vert", vector<string>(0) },
18936 { "tesc", vector<string>(1, "Features.tessellationShader") },
18937 { "tese", vector<string>(1, "Features.tessellationShader") },
18938 { "geom", vector<string>(1, "Features.geometryShader") },
18939 { "frag", vector<string>(0) }
18942 const ComparisonCase amberTests[] =
18944 { "modfstruct", "modf and modfStruct" },
18945 { "frexpstruct", "frexp and frexpStruct" }
18948 for (ComparisonCase test : amberTests)
18949 for (ShaderStage stage : stages)
18951 const string caseName (test.name + "_" + stage.name);
18952 const string caseDesc ("Compare output of " + test.desc);
18953 const string fileName (test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
18955 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18960 stage.requirement));
18964 return testGroup.release();
18967 const string getNumberTypeName (const NumberType type)
18969 if (type == NUMBERTYPE_INT32)
18973 else if (type == NUMBERTYPE_UINT32)
18977 else if (type == NUMBERTYPE_FLOAT32)
18988 deInt32 getInt(de::Random& rnd)
18990 return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
18993 const string repeatString (const string& str, int times)
18996 for (int i = 0; i < times; ++i)
19003 const string getRandomConstantString (const NumberType type, de::Random& rnd)
19005 if (type == NUMBERTYPE_INT32)
19007 return numberToString<deInt32>(getInt(rnd));
19009 else if (type == NUMBERTYPE_UINT32)
19011 return numberToString<deUint32>(rnd.getUint32());
19013 else if (type == NUMBERTYPE_FLOAT32)
19015 return numberToString<float>(rnd.getFloat());
19024 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19026 map<string, string> params;
19029 for (int width = 2; width <= 4; ++width)
19031 const string randomConst = numberToString(getInt(rnd));
19032 const string widthStr = numberToString(width);
19033 const string composite_type = "${customType}vec" + widthStr;
19034 const int index = rnd.getInt(0, width-1);
19036 params["type"] = "vec";
19037 params["name"] = params["type"] + "_" + widthStr;
19038 params["compositeDecl"] = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
19039 params["compositeType"] = composite_type;
19040 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19041 params["compositeConstruct"] = "%instance = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19042 params["indexes"] = numberToString(index);
19043 testCases.push_back(params);
19047 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19049 const int limit = 10;
19050 map<string, string> params;
19052 for (int width = 2; width <= limit; ++width)
19054 string randomConst = numberToString(getInt(rnd));
19055 string widthStr = numberToString(width);
19056 int index = rnd.getInt(0, width-1);
19058 params["type"] = "array";
19059 params["name"] = params["type"] + "_" + widthStr;
19060 params["compositeDecl"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
19061 + "%composite = OpTypeArray ${customType} %arraywidth\n";
19062 params["compositeType"] = "%composite";
19063 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19064 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19065 params["indexes"] = numberToString(index);
19066 testCases.push_back(params);
19070 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19072 const int limit = 10;
19073 map<string, string> params;
19075 for (int width = 2; width <= limit; ++width)
19077 string randomConst = numberToString(getInt(rnd));
19078 int index = rnd.getInt(0, width-1);
19080 params["type"] = "struct";
19081 params["name"] = params["type"] + "_" + numberToString(width);
19082 params["compositeDecl"] = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19083 params["compositeType"] = "%composite";
19084 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19085 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19086 params["indexes"] = numberToString(index);
19087 testCases.push_back(params);
19091 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19093 map<string, string> params;
19096 for (int width = 2; width <= 4; ++width)
19098 string widthStr = numberToString(width);
19100 for (int column = 2 ; column <= 4; ++column)
19102 int index_0 = rnd.getInt(0, column-1);
19103 int index_1 = rnd.getInt(0, width-1);
19104 string columnStr = numberToString(column);
19106 params["type"] = "matrix";
19107 params["name"] = params["type"] + "_" + widthStr + "x" + columnStr;
19108 params["compositeDecl"] = string("%vectype = OpTypeVector ${customType} " + widthStr + "\n")
19109 + "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19110 params["compositeType"] = "%composite";
19112 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
19113 + "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
19115 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19116 params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
19117 testCases.push_back(params);
19122 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19124 createVectorCompositeCases(testCases, rnd, type);
19125 createArrayCompositeCases(testCases, rnd, type);
19126 createStructCompositeCases(testCases, rnd, type);
19127 // Matrix only supports float types
19128 if (type == NUMBERTYPE_FLOAT32)
19130 createMatrixCompositeCases(testCases, rnd, type);
19134 const string getAssemblyTypeDeclaration (const NumberType type)
19138 case NUMBERTYPE_INT32: return "OpTypeInt 32 1";
19139 case NUMBERTYPE_UINT32: return "OpTypeInt 32 0";
19140 case NUMBERTYPE_FLOAT32: return "OpTypeFloat 32";
19141 default: DE_ASSERT(false); return "";
19145 const string getAssemblyTypeName (const NumberType type)
19149 case NUMBERTYPE_INT32: return "%i32";
19150 case NUMBERTYPE_UINT32: return "%u32";
19151 case NUMBERTYPE_FLOAT32: return "%f32";
19152 default: DE_ASSERT(false); return "";
19156 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
19158 map<string, string> parameters(params);
19160 const string customType = getAssemblyTypeName(type);
19161 map<string, string> substCustomType;
19162 substCustomType["customType"] = customType;
19163 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19164 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19165 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19166 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19167 parameters["customType"] = customType;
19168 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19170 if (parameters.at("compositeType") != "%u32vec3")
19172 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19175 return StringTemplate(
19176 "OpCapability Shader\n"
19177 "OpCapability Matrix\n"
19178 "OpMemoryModel Logical GLSL450\n"
19179 "OpEntryPoint GLCompute %main \"main\" %id\n"
19180 "OpExecutionMode %main LocalSize 1 1 1\n"
19182 "OpSource GLSL 430\n"
19183 "OpName %main \"main\"\n"
19184 "OpName %id \"gl_GlobalInvocationID\"\n"
19187 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19188 "OpDecorate %buf BufferBlock\n"
19189 "OpDecorate %indata DescriptorSet 0\n"
19190 "OpDecorate %indata Binding 0\n"
19191 "OpDecorate %outdata DescriptorSet 0\n"
19192 "OpDecorate %outdata Binding 1\n"
19193 "OpDecorate %customarr ArrayStride 4\n"
19194 "${compositeDecorator}"
19195 "OpMemberDecorate %buf 0 Offset 0\n"
19198 "%void = OpTypeVoid\n"
19199 "%voidf = OpTypeFunction %void\n"
19200 "%u32 = OpTypeInt 32 0\n"
19201 "%i32 = OpTypeInt 32 1\n"
19202 "%f32 = OpTypeFloat 32\n"
19204 // Composite declaration
19210 "${u32vec3Decl:opt}"
19211 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19213 // Inherited from custom
19214 "%customptr = OpTypePointer Uniform ${customType}\n"
19215 "%customarr = OpTypeRuntimeArray ${customType}\n"
19216 "%buf = OpTypeStruct %customarr\n"
19217 "%bufptr = OpTypePointer Uniform %buf\n"
19219 "%indata = OpVariable %bufptr Uniform\n"
19220 "%outdata = OpVariable %bufptr Uniform\n"
19222 "%id = OpVariable %uvec3ptr Input\n"
19223 "%zero = OpConstant %i32 0\n"
19225 "%main = OpFunction %void None %voidf\n"
19226 "%label = OpLabel\n"
19227 "%idval = OpLoad %u32vec3 %id\n"
19228 "%x = OpCompositeExtract %u32 %idval 0\n"
19230 "%inloc = OpAccessChain %customptr %indata %zero %x\n"
19231 "%outloc = OpAccessChain %customptr %outdata %zero %x\n"
19232 // Read the input value
19233 "%inval = OpLoad ${customType} %inloc\n"
19234 // Create the composite and fill it
19235 "${compositeConstruct}"
19236 // Insert the input value to a place
19237 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19238 // Read back the value from the position
19239 "%out_val = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19240 // Store it in the output position
19241 " OpStore %outloc %out_val\n"
19244 ).specialize(parameters);
19247 template<typename T>
19248 BufferSp createCompositeBuffer(T number)
19250 return BufferSp(new Buffer<T>(vector<T>(1, number)));
19253 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
19255 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
19256 de::Random rnd (deStringHash(group->getName()));
19258 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19260 NumberType numberType = NumberType(type);
19261 const string typeName = getNumberTypeName(numberType);
19262 const string description = "Test the OpCompositeInsert instruction with " + typeName + "s";
19263 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19264 vector<map<string, string> > testCases;
19266 createCompositeCases(testCases, rnd, numberType);
19268 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19270 ComputeShaderSpec spec;
19272 spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19274 switch (numberType)
19276 case NUMBERTYPE_INT32:
19278 deInt32 number = getInt(rnd);
19279 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19280 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19283 case NUMBERTYPE_UINT32:
19285 deUint32 number = rnd.getUint32();
19286 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19287 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19290 case NUMBERTYPE_FLOAT32:
19292 float number = rnd.getFloat();
19293 spec.inputs.push_back(createCompositeBuffer<float>(number));
19294 spec.outputs.push_back(createCompositeBuffer<float>(number));
19301 spec.numWorkGroups = IVec3(1, 1, 1);
19302 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
19304 group->addChild(subGroup.release());
19306 return group.release();
19309 struct AssemblyStructInfo
19311 AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
19312 : components (comp)
19316 deUint32 components;
19320 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
19322 // Create the full index string
19323 string fullIndex = numberToString(structInfo.index) + " " + params.at("indexes");
19324 // Convert it to list of indexes
19325 vector<string> indexes = de::splitString(fullIndex, ' ');
19327 map<string, string> parameters (params);
19328 parameters["structType"] = repeatString(" ${compositeType}", structInfo.components);
19329 parameters["structConstruct"] = repeatString(" %instance", structInfo.components);
19330 parameters["insertIndexes"] = fullIndex;
19332 // In matrix cases the last two index is the CompositeExtract indexes
19333 const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19335 // Construct the extractIndex
19336 for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19338 parameters["extractIndexes"] += " " + *index;
19341 // Remove the last 1 or 2 element depends on matrix case or not
19342 indexes.erase(indexes.end() - extractIndexes, indexes.end());
19345 // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19346 for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19348 string indexId = "%index_" + numberToString(id++);
19349 parameters["accessChainConstDeclaration"] += indexId + " = OpConstant %u32 " + *index + "\n";
19350 parameters["accessChainIndexes"] += " " + indexId;
19353 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19355 const string customType = getAssemblyTypeName(type);
19356 map<string, string> substCustomType;
19357 substCustomType["customType"] = customType;
19358 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19359 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19360 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19361 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19362 parameters["customType"] = customType;
19364 const string compositeType = parameters.at("compositeType");
19365 map<string, string> substCompositeType;
19366 substCompositeType["compositeType"] = compositeType;
19367 parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19368 if (compositeType != "%u32vec3")
19370 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19373 return StringTemplate(
19374 "OpCapability Shader\n"
19375 "OpCapability Matrix\n"
19376 "OpMemoryModel Logical GLSL450\n"
19377 "OpEntryPoint GLCompute %main \"main\" %id\n"
19378 "OpExecutionMode %main LocalSize 1 1 1\n"
19380 "OpSource GLSL 430\n"
19381 "OpName %main \"main\"\n"
19382 "OpName %id \"gl_GlobalInvocationID\"\n"
19384 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19385 "OpDecorate %buf BufferBlock\n"
19386 "OpDecorate %indata DescriptorSet 0\n"
19387 "OpDecorate %indata Binding 0\n"
19388 "OpDecorate %outdata DescriptorSet 0\n"
19389 "OpDecorate %outdata Binding 1\n"
19390 "OpDecorate %customarr ArrayStride 4\n"
19391 "${compositeDecorator}"
19392 "OpMemberDecorate %buf 0 Offset 0\n"
19394 "%void = OpTypeVoid\n"
19395 "%voidf = OpTypeFunction %void\n"
19396 "%i32 = OpTypeInt 32 1\n"
19397 "%u32 = OpTypeInt 32 0\n"
19398 "%f32 = OpTypeFloat 32\n"
19401 // %u32vec3 if not already declared in ${compositeDecl}
19402 "${u32vec3Decl:opt}"
19403 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19404 // Inherited from composite
19405 "%composite_p = OpTypePointer Function ${compositeType}\n"
19406 "%struct_t = OpTypeStruct${structType}\n"
19407 "%struct_p = OpTypePointer Function %struct_t\n"
19410 "${accessChainConstDeclaration}"
19411 // Inherited from custom
19412 "%customptr = OpTypePointer Uniform ${customType}\n"
19413 "%customarr = OpTypeRuntimeArray ${customType}\n"
19414 "%buf = OpTypeStruct %customarr\n"
19415 "%bufptr = OpTypePointer Uniform %buf\n"
19416 "%indata = OpVariable %bufptr Uniform\n"
19417 "%outdata = OpVariable %bufptr Uniform\n"
19419 "%id = OpVariable %uvec3ptr Input\n"
19420 "%zero = OpConstant %u32 0\n"
19421 "%main = OpFunction %void None %voidf\n"
19422 "%label = OpLabel\n"
19423 "%struct_v = OpVariable %struct_p Function\n"
19424 "%idval = OpLoad %u32vec3 %id\n"
19425 "%x = OpCompositeExtract %u32 %idval 0\n"
19426 // Create the input/output type
19427 "%inloc = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19428 "%outloc = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19429 // Read the input value
19430 "%inval = OpLoad ${customType} %inloc\n"
19431 // Create the composite and fill it
19432 "${compositeConstruct}"
19433 // Create the struct and fill it with the composite
19434 "%struct = OpCompositeConstruct %struct_t${structConstruct}\n"
19435 // Insert the value
19436 "%comp_obj = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19437 // Store the object
19438 " OpStore %struct_v %comp_obj\n"
19439 // Get deepest possible composite pointer
19440 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19441 "%read_obj = OpLoad ${compositeType} %inner_ptr\n"
19442 // Read back the stored value
19443 "%read_val = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19444 " OpStore %outloc %read_val\n"
19447 ).specialize(parameters);
19450 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
19452 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
19453 de::Random rnd (deStringHash(group->getName()));
19455 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19457 NumberType numberType = NumberType(type);
19458 const string typeName = getNumberTypeName(numberType);
19459 const string description = "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
19460 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19462 vector<map<string, string> > testCases;
19463 createCompositeCases(testCases, rnd, numberType);
19465 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19467 ComputeShaderSpec spec;
19469 // Number of components inside of a struct
19470 deUint32 structComponents = rnd.getInt(2, 8);
19471 // Component index value
19472 deUint32 structIndex = rnd.getInt(0, structComponents - 1);
19473 AssemblyStructInfo structInfo(structComponents, structIndex);
19475 spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19477 switch (numberType)
19479 case NUMBERTYPE_INT32:
19481 deInt32 number = getInt(rnd);
19482 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19483 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19486 case NUMBERTYPE_UINT32:
19488 deUint32 number = rnd.getUint32();
19489 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19490 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19493 case NUMBERTYPE_FLOAT32:
19495 float number = rnd.getFloat();
19496 spec.inputs.push_back(createCompositeBuffer<float>(number));
19497 spec.outputs.push_back(createCompositeBuffer<float>(number));
19503 spec.numWorkGroups = IVec3(1, 1, 1);
19504 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
19506 group->addChild(subGroup.release());
19508 return group.release();
19511 // If the params missing, uninitialized case
19512 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
19514 map<string, string> parameters(params);
19516 parameters["customType"] = getAssemblyTypeName(type);
19518 // Declare the const value, and use it in the initializer
19519 if (params.find("constValue") != params.end())
19521 parameters["variableInitializer"] = " %const";
19523 // Uninitialized case
19526 parameters["commentDecl"] = ";";
19529 return StringTemplate(
19530 "OpCapability Shader\n"
19531 "OpMemoryModel Logical GLSL450\n"
19532 "OpEntryPoint GLCompute %main \"main\" %id\n"
19533 "OpExecutionMode %main LocalSize 1 1 1\n"
19534 "OpSource GLSL 430\n"
19535 "OpName %main \"main\"\n"
19536 "OpName %id \"gl_GlobalInvocationID\"\n"
19538 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19539 "OpDecorate %indata DescriptorSet 0\n"
19540 "OpDecorate %indata Binding 0\n"
19541 "OpDecorate %outdata DescriptorSet 0\n"
19542 "OpDecorate %outdata Binding 1\n"
19543 "OpDecorate %in_arr ArrayStride 4\n"
19544 "OpDecorate %in_buf BufferBlock\n"
19545 "OpMemberDecorate %in_buf 0 Offset 0\n"
19547 "%void = OpTypeVoid\n"
19548 "%voidf = OpTypeFunction %void\n"
19549 "%u32 = OpTypeInt 32 0\n"
19550 "%i32 = OpTypeInt 32 1\n"
19551 "%f32 = OpTypeFloat 32\n"
19552 "%uvec3 = OpTypeVector %u32 3\n"
19553 "%uvec3ptr = OpTypePointer Input %uvec3\n"
19554 "${commentDecl:opt}%const = OpConstant ${customType} ${constValue:opt}\n"
19556 "%in_ptr = OpTypePointer Uniform ${customType}\n"
19557 "%in_arr = OpTypeRuntimeArray ${customType}\n"
19558 "%in_buf = OpTypeStruct %in_arr\n"
19559 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
19560 "%indata = OpVariable %in_bufptr Uniform\n"
19561 "%outdata = OpVariable %in_bufptr Uniform\n"
19562 "%id = OpVariable %uvec3ptr Input\n"
19563 "%var_ptr = OpTypePointer Function ${customType}\n"
19565 "%zero = OpConstant %i32 0\n"
19567 "%main = OpFunction %void None %voidf\n"
19568 "%label = OpLabel\n"
19569 "%out_var = OpVariable %var_ptr Function${variableInitializer:opt}\n"
19570 "%idval = OpLoad %uvec3 %id\n"
19571 "%x = OpCompositeExtract %u32 %idval 0\n"
19572 "%inloc = OpAccessChain %in_ptr %indata %zero %x\n"
19573 "%outloc = OpAccessChain %in_ptr %outdata %zero %x\n"
19575 "%outval = OpLoad ${customType} %out_var\n"
19576 " OpStore %outloc %outval\n"
19579 ).specialize(parameters);
19582 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
19584 DE_ASSERT(outputAllocs.size() != 0);
19585 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19587 // Use custom epsilon because of the float->string conversion
19588 const float epsilon = 0.00001f;
19590 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19592 vector<deUint8> expectedBytes;
19596 expectedOutputs[outputNdx].getBytes(expectedBytes);
19597 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
19598 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
19600 // Test with epsilon
19601 if (fabs(expected - actual) > epsilon)
19603 log << TestLog::Message << "Error: The actual and expected values not matching."
19604 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
19611 // Checks if the driver crash with uninitialized cases
19612 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
19614 DE_ASSERT(outputAllocs.size() != 0);
19615 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19617 // Copy and discard the result.
19618 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19620 vector<deUint8> expectedBytes;
19621 expectedOutputs[outputNdx].getBytes(expectedBytes);
19623 const size_t width = expectedBytes.size();
19624 vector<char> data (width);
19626 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
19631 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19633 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19634 de::Random rnd (deStringHash(group->getName()));
19636 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19638 NumberType numberType = NumberType(type);
19639 const string typeName = getNumberTypeName(numberType);
19640 const string description = "Test the OpVariable initializer with " + typeName + ".";
19641 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19643 // 2 similar subcases (initialized and uninitialized)
19644 for (int subCase = 0; subCase < 2; ++subCase)
19646 ComputeShaderSpec spec;
19647 spec.numWorkGroups = IVec3(1, 1, 1);
19649 map<string, string> params;
19651 switch (numberType)
19653 case NUMBERTYPE_INT32:
19655 deInt32 number = getInt(rnd);
19656 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19657 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19658 params["constValue"] = numberToString(number);
19661 case NUMBERTYPE_UINT32:
19663 deUint32 number = rnd.getUint32();
19664 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19665 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19666 params["constValue"] = numberToString(number);
19669 case NUMBERTYPE_FLOAT32:
19671 float number = rnd.getFloat();
19672 spec.inputs.push_back(createCompositeBuffer<float>(number));
19673 spec.outputs.push_back(createCompositeBuffer<float>(number));
19674 spec.verifyIO = &compareFloats;
19675 params["constValue"] = numberToString(number);
19682 // Initialized subcase
19685 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19686 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
19688 // Uninitialized subcase
19691 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19692 spec.verifyIO = &passthruVerify;
19693 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
19696 group->addChild(subGroup.release());
19698 return group.release();
19701 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19703 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19704 RGBA defaultColors[4];
19705 map<string, string> opNopFragments;
19707 getDefaultColors(defaultColors);
19709 opNopFragments["testfun"] =
19710 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19711 "%param1 = OpFunctionParameter %v4f32\n"
19712 "%label_testfun = OpLabel\n"
19721 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19722 "%b = OpFAdd %f32 %a %a\n"
19724 "%c = OpFSub %f32 %b %a\n"
19725 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19728 "OpReturnValue %ret\n"
19731 createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19733 return testGroup.release();
19736 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19738 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19739 RGBA defaultColors[4];
19740 map<string, string> opNameFragments;
19742 getDefaultColors(defaultColors);
19744 opNameFragments["testfun"] =
19745 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19746 "%param1 = OpFunctionParameter %v4f32\n"
19747 "%label_func = OpLabel\n"
19748 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19749 "%b = OpFAdd %f32 %a %a\n"
19750 "%c = OpFSub %f32 %b %a\n"
19751 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19752 "OpReturnValue %ret\n"
19755 opNameFragments["debug"] =
19756 "OpName %BP_main \"not_main\"";
19758 createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19760 return testGroup.release();
19763 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19765 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19767 testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19768 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19769 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19770 testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19771 testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19772 testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19773 testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19774 testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19775 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19776 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19777 testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19778 testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19779 testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19780 testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19781 testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19783 return testGroup.release();
19786 tcu::TestCaseGroup* createFloat32Tests (tcu::TestContext& testCtx)
19788 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19790 testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
19791 testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
19792 testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
19793 testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
19795 return testGroup.release();
19798 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19800 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19802 testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19803 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19804 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19805 testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19806 testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19807 testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19808 testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19809 testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19810 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19811 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19812 testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19813 testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19814 testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19815 testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19816 testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19818 return testGroup.release();
19821 tcu::TestCaseGroup* createFloat32Group (tcu::TestContext& testCtx)
19823 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19825 testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
19826 testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
19827 testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
19828 testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
19830 return testGroup.release();
19833 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19835 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19837 de::Random rnd (deStringHash(group->getName()));
19838 const int numElements = 100;
19839 vector<float> inputData (numElements, 0);
19840 vector<float> outputData (numElements, 0);
19841 fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19843 const StringTemplate shaderTemplate (
19845 "OpMemoryModel Logical GLSL450\n"
19846 "OpEntryPoint GLCompute %main \"main\" %id\n"
19847 "OpExecutionMode %main LocalSize 1 1 1\n"
19848 "OpSource GLSL 430\n"
19849 "OpName %main \"main\"\n"
19850 "OpName %id \"gl_GlobalInvocationID\"\n"
19852 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19854 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19856 "%id = OpVariable %uvec3ptr Input\n"
19858 "%main = OpFunction %void None %voidf\n"
19859 "%label = OpLabel\n"
19860 "%idval = OpLoad %uvec3 %id\n"
19861 "%x = OpCompositeExtract %u32 %idval 0\n"
19862 "%inloc = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19866 "%outloc = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19867 " OpStore %outloc %res\n"
19872 // Each test case produces 4 boolean values, and we want each of these values
19873 // to come froma different combination of the available bit-sizes, so compute
19874 // all possible combinations here.
19875 vector<deUint32> widths;
19876 widths.push_back(32);
19877 widths.push_back(16);
19878 widths.push_back(8);
19880 vector<IVec4> cases;
19881 for (size_t width0 = 0; width0 < widths.size(); width0++)
19883 for (size_t width1 = 0; width1 < widths.size(); width1++)
19885 for (size_t width2 = 0; width2 < widths.size(); width2++)
19887 for (size_t width3 = 0; width3 < widths.size(); width3++)
19889 cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19895 for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19897 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19898 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19901 map<string, string> specializations;
19902 ComputeShaderSpec spec;
19904 // Inject appropriate capabilities and reference constants depending
19905 // on the bit-sizes required by this test case
19906 bool hasFloat32 = cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
19907 bool hasFloat16 = cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
19908 bool hasInt8 = cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
19910 string capsStr = "OpCapability Shader\n";
19912 "%c0i32 = OpConstant %i32 0\n"
19913 "%c1f32 = OpConstant %f32 1.0\n"
19914 "%c0f32 = OpConstant %f32 0.0\n";
19919 "%c10f32 = OpConstant %f32 10.0\n"
19920 "%c25f32 = OpConstant %f32 25.0\n"
19921 "%c50f32 = OpConstant %f32 50.0\n"
19922 "%c90f32 = OpConstant %f32 90.0\n";
19927 capsStr += "OpCapability Float16\n";
19929 "%f16 = OpTypeFloat 16\n"
19930 "%c10f16 = OpConstant %f16 10.0\n"
19931 "%c25f16 = OpConstant %f16 25.0\n"
19932 "%c50f16 = OpConstant %f16 50.0\n"
19933 "%c90f16 = OpConstant %f16 90.0\n";
19938 capsStr += "OpCapability Int8\n";
19940 "%i8 = OpTypeInt 8 1\n"
19941 "%c10i8 = OpConstant %i8 10\n"
19942 "%c25i8 = OpConstant %i8 25\n"
19943 "%c50i8 = OpConstant %i8 50\n"
19944 "%c90i8 = OpConstant %i8 90\n";
19947 // Each invocation reads a different float32 value as input. Depending on
19948 // the bit-sizes required by the particular test case, we also produce
19949 // float16 and/or and int8 values by converting from the 32-bit float.
19950 string testStr = "";
19951 testStr += "%inval32 = OpLoad %f32 %inloc\n";
19953 testStr += "%inval16 = OpFConvert %f16 %inval32\n";
19955 testStr += "%inval8 = OpConvertFToS %i8 %inval32\n";
19957 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
19958 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
19959 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
19960 // other way around, so in this case we want < instead of <=.
19961 if (cases[caseNdx][0] == 32)
19962 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
19963 else if (cases[caseNdx][0] == 16)
19964 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
19966 testStr += "%cmp1 = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
19968 if (cases[caseNdx][1] == 32)
19969 testStr += "%cmp2 = OpFOrdLessThan %bool %inval32 %c50f32\n";
19970 else if (cases[caseNdx][1] == 16)
19971 testStr += "%cmp2 = OpFOrdLessThan %bool %inval16 %c50f16\n";
19973 testStr += "%cmp2 = OpSLessThan %bool %inval8 %c50i8\n";
19975 if (cases[caseNdx][2] == 32)
19976 testStr += "%cmp3 = OpFOrdLessThan %bool %inval32 %c10f32\n";
19977 else if (cases[caseNdx][2] == 16)
19978 testStr += "%cmp3 = OpFOrdLessThan %bool %inval16 %c10f16\n";
19980 testStr += "%cmp3 = OpSLessThan %bool %inval8 %c10i8\n";
19982 if (cases[caseNdx][3] == 32)
19983 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
19984 else if (cases[caseNdx][3] == 16)
19985 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
19987 testStr += "%cmp4 = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
19989 testStr += "%and1 = OpLogicalAnd %bool %cmp1 %cmp2\n";
19990 testStr += "%or1 = OpLogicalOr %bool %cmp3 %cmp4\n";
19991 testStr += "%or2 = OpLogicalOr %bool %and1 %or1\n";
19992 testStr += "%not1 = OpLogicalNot %bool %or2\n";
19993 testStr += "%res = OpSelect %f32 %not1 %c1f32 %c0f32\n";
19995 specializations["CAPS"] = capsStr;
19996 specializations["CONST"] = constStr;
19997 specializations["TEST"] = testStr;
19999 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
20000 for (size_t ndx = 0; ndx < numElements; ++ndx)
20001 outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
20003 spec.assembly = shaderTemplate.specialize(specializations);
20004 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
20005 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
20006 spec.numWorkGroups = IVec3(numElements, 1, 1);
20008 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
20010 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
20011 spec.extensions.push_back("VK_KHR_shader_float16_int8");
20013 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
20014 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
20017 return group.release();
20020 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
20022 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
20024 testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20026 return testGroup.release();
20029 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
20031 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
20032 vector<CaseParameter> abuseCases;
20033 RGBA defaultColors[4];
20034 map<string, string> opNameFragments;
20036 getOpNameAbuseCases(abuseCases);
20037 getDefaultColors(defaultColors);
20039 opNameFragments["testfun"] =
20040 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20041 "%param1 = OpFunctionParameter %v4f32\n"
20042 "%label_func = OpLabel\n"
20043 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20044 "%b = OpFAdd %f32 %a %a\n"
20045 "%c = OpFSub %f32 %b %a\n"
20046 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20047 "OpReturnValue %ret\n"
20050 for (unsigned int i = 0; i < abuseCases.size(); i++)
20053 casename = string("main") + abuseCases[i].name;
20055 opNameFragments["debug"] =
20056 "OpName %BP_main \"" + abuseCases[i].param + "\"";
20058 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20061 for (unsigned int i = 0; i < abuseCases.size(); i++)
20064 casename = string("b") + abuseCases[i].name;
20066 opNameFragments["debug"] =
20067 "OpName %b \"" + abuseCases[i].param + "\"";
20069 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20073 opNameFragments["debug"] =
20074 "OpName %test_code \"name1\"\n"
20075 "OpName %param1 \"name2\"\n"
20076 "OpName %a \"name3\"\n"
20077 "OpName %b \"name4\"\n"
20078 "OpName %c \"name5\"\n"
20079 "OpName %ret \"name6\"\n";
20081 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20085 opNameFragments["debug"] =
20086 "OpName %test_code \"the_same\"\n"
20087 "OpName %param1 \"the_same\"\n"
20088 "OpName %a \"the_same\"\n"
20089 "OpName %b \"the_same\"\n"
20090 "OpName %c \"the_same\"\n"
20091 "OpName %ret \"the_same\"\n";
20093 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20097 opNameFragments["debug"] =
20098 "OpName %BP_main \"to_be\"\n"
20099 "OpName %BP_main \"or_not\"\n"
20100 "OpName %BP_main \"to_be\"\n";
20102 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20106 opNameFragments["debug"] =
20107 "OpName %b \"to_be\"\n"
20108 "OpName %b \"or_not\"\n"
20109 "OpName %b \"to_be\"\n";
20111 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20114 return abuseGroup.release();
20118 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
20120 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
20121 vector<CaseParameter> abuseCases;
20122 RGBA defaultColors[4];
20123 map<string, string> opMemberNameFragments;
20125 getOpNameAbuseCases(abuseCases);
20126 getDefaultColors(defaultColors);
20128 opMemberNameFragments["pre_main"] =
20129 "%f3str = OpTypeStruct %f32 %f32 %f32\n";
20131 opMemberNameFragments["testfun"] =
20132 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20133 "%param1 = OpFunctionParameter %v4f32\n"
20134 "%label_func = OpLabel\n"
20135 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20136 "%b = OpFAdd %f32 %a %a\n"
20137 "%c = OpFSub %f32 %b %a\n"
20138 "%cstr = OpCompositeConstruct %f3str %c %c %c\n"
20139 "%d = OpCompositeExtract %f32 %cstr 0\n"
20140 "%ret = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20141 "OpReturnValue %ret\n"
20144 for (unsigned int i = 0; i < abuseCases.size(); i++)
20147 casename = string("f3str_x") + abuseCases[i].name;
20149 opMemberNameFragments["debug"] =
20150 "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20152 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20156 opMemberNameFragments["debug"] =
20157 "OpMemberName %f3str 0 \"name1\"\n"
20158 "OpMemberName %f3str 1 \"name2\"\n"
20159 "OpMemberName %f3str 2 \"name3\"\n";
20161 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20165 opMemberNameFragments["debug"] =
20166 "OpMemberName %f3str 0 \"the_same\"\n"
20167 "OpMemberName %f3str 1 \"the_same\"\n"
20168 "OpMemberName %f3str 2 \"the_same\"\n";
20170 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20174 opMemberNameFragments["debug"] =
20175 "OpMemberName %f3str 0 \"to_be\"\n"
20176 "OpMemberName %f3str 1 \"or_not\"\n"
20177 "OpMemberName %f3str 0 \"to_be\"\n"
20178 "OpMemberName %f3str 2 \"makes_no\"\n"
20179 "OpMemberName %f3str 0 \"difference\"\n"
20180 "OpMemberName %f3str 0 \"to_me\"\n";
20183 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20186 return abuseGroup.release();
20189 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20191 vector<deUint32> result;
20192 de::Random rnd (seed);
20194 result.reserve(numDataPoints);
20196 for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20197 result.push_back(rnd.getUint32());
20202 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
20204 vector<deUint32> result;
20206 result.reserve(inData1.size());
20208 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20209 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20214 template<class SpecResource>
20215 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20217 const deUint32 numDataPoints = 16;
20218 const std::string testName ("sparse_ids");
20219 const deUint32 seed (deStringHash(testName.c_str()));
20220 const vector<deUint32> inData1 (getSparseIdsAbuseData(numDataPoints, seed + 1));
20221 const vector<deUint32> inData2 (getSparseIdsAbuseData(numDataPoints, seed + 2));
20222 const vector<deUint32> outData (getSparseIdsAbuseResults(inData1, inData2));
20223 const StringTemplate preMain
20225 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20226 " %up_u32 = OpTypePointer Uniform %u32\n"
20227 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20228 " %SSBO32 = OpTypeStruct %ra_u32\n"
20229 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20230 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20231 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20232 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20234 const StringTemplate decoration
20236 "OpDecorate %ra_u32 ArrayStride 4\n"
20237 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20238 "OpDecorate %SSBO32 BufferBlock\n"
20239 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20240 "OpDecorate %ssbo_src0 Binding 0\n"
20241 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20242 "OpDecorate %ssbo_src1 Binding 1\n"
20243 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20244 "OpDecorate %ssbo_dst Binding 2\n"
20246 const StringTemplate testFun
20248 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20249 " %param = OpFunctionParameter %v4f32\n"
20251 " %entry = OpLabel\n"
20252 " %i = OpVariable %fp_i32 Function\n"
20253 " OpStore %i %c_i32_0\n"
20254 " OpBranch %loop\n"
20256 " %loop = OpLabel\n"
20257 " %i_cmp = OpLoad %i32 %i\n"
20258 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20259 " OpLoopMerge %merge %next None\n"
20260 " OpBranchConditional %lt %write %merge\n"
20262 " %write = OpLabel\n"
20263 " %ndx = OpLoad %i32 %i\n"
20265 " %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20266 " %128 = OpLoad %u32 %127\n"
20268 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20269 " %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20270 " %4194001 = OpLoad %u32 %4194000\n"
20272 " %2097151 = OpIAdd %u32 %128 %4194001\n"
20273 " %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20274 " OpStore %2097152 %2097151\n"
20275 " OpBranch %next\n"
20277 " %next = OpLabel\n"
20278 " %i_cur = OpLoad %i32 %i\n"
20279 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20280 " OpStore %i %i_new\n"
20281 " OpBranch %loop\n"
20283 " %merge = OpLabel\n"
20284 " OpReturnValue %param\n"
20288 SpecResource specResource;
20289 map<string, string> specs;
20290 VulkanFeatures features;
20291 map<string, string> fragments;
20292 vector<string> extensions;
20294 specs["num_data_points"] = de::toString(numDataPoints);
20296 fragments["decoration"] = decoration.specialize(specs);
20297 fragments["pre_main"] = preMain.specialize(specs);
20298 fragments["testfun"] = testFun.specialize(specs);
20300 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20301 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20302 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20304 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20306 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20307 features.coreFeatures.fragmentStoresAndAtomics = true;
20310 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20313 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20315 vector<deUint32> result;
20316 de::Random rnd (seed);
20318 result.reserve(numDataPoints);
20321 result.push_back(1u);
20324 for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20325 result.push_back(rnd.getUint8());
20330 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
20332 vector<deUint32> result;
20334 result.reserve(inData1.size());
20336 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20337 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20342 template<class SpecResource>
20343 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20345 const deUint32 numDataPoints = 16;
20346 const deUint32 firstNdx = 100u;
20347 const deUint32 sequenceCount = 10000u;
20348 const std::string testName ("lots_ids");
20349 const deUint32 seed (deStringHash(testName.c_str()));
20350 const vector<deUint32> inData1 (getLotsIdsAbuseData(numDataPoints, seed + 1));
20351 const vector<deUint32> inData2 (getLotsIdsAbuseData(numDataPoints, seed + 2));
20352 const vector<deUint32> outData (getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20353 const StringTemplate preMain
20355 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20356 " %up_u32 = OpTypePointer Uniform %u32\n"
20357 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20358 " %SSBO32 = OpTypeStruct %ra_u32\n"
20359 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20360 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20361 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20362 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20364 const StringTemplate decoration
20366 "OpDecorate %ra_u32 ArrayStride 4\n"
20367 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20368 "OpDecorate %SSBO32 BufferBlock\n"
20369 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20370 "OpDecorate %ssbo_src0 Binding 0\n"
20371 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20372 "OpDecorate %ssbo_src1 Binding 1\n"
20373 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20374 "OpDecorate %ssbo_dst Binding 2\n"
20376 const StringTemplate testFun
20378 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20379 " %param = OpFunctionParameter %v4f32\n"
20381 " %entry = OpLabel\n"
20382 " %i = OpVariable %fp_i32 Function\n"
20383 " OpStore %i %c_i32_0\n"
20384 " OpBranch %loop\n"
20386 " %loop = OpLabel\n"
20387 " %i_cmp = OpLoad %i32 %i\n"
20388 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20389 " OpLoopMerge %merge %next None\n"
20390 " OpBranchConditional %lt %write %merge\n"
20392 " %write = OpLabel\n"
20393 " %ndx = OpLoad %i32 %i\n"
20395 " %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20396 " %91 = OpLoad %u32 %90\n"
20398 " %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20399 " %${zeroth_id} = OpLoad %u32 %98\n"
20403 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20404 " %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20405 " OpStore %dst %${last_id}\n"
20406 " OpBranch %next\n"
20408 " %next = OpLabel\n"
20409 " %i_cur = OpLoad %i32 %i\n"
20410 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20411 " OpStore %i %i_new\n"
20412 " OpBranch %loop\n"
20414 " %merge = OpLabel\n"
20415 " OpReturnValue %param\n"
20419 deUint32 lastId = firstNdx;
20420 SpecResource specResource;
20421 map<string, string> specs;
20422 VulkanFeatures features;
20423 map<string, string> fragments;
20424 vector<string> extensions;
20425 std::string sequence;
20427 for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20429 const deUint32 sequenceId = sequenceNdx + firstNdx;
20430 const std::string sequenceIdStr = de::toString(sequenceId);
20432 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20433 lastId = sequenceId;
20435 if (sequenceNdx == 0)
20436 sequence.reserve((10 + sequence.length()) * sequenceCount);
20439 specs["num_data_points"] = de::toString(numDataPoints);
20440 specs["zeroth_id"] = de::toString(firstNdx - 1);
20441 specs["last_id"] = de::toString(lastId);
20442 specs["seq"] = sequence;
20444 fragments["decoration"] = decoration.specialize(specs);
20445 fragments["pre_main"] = preMain.specialize(specs);
20446 fragments["testfun"] = testFun.specialize(specs);
20448 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20449 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20450 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20452 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20454 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20455 features.coreFeatures.fragmentStoresAndAtomics = true;
20458 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20461 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
20463 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20465 createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20466 createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20468 return testGroup.release();
20471 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
20473 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20475 createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20476 createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20478 return testGroup.release();
20481 tcu::TestCaseGroup* createFunctionParamsGroup (tcu::TestContext& testCtx)
20483 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function_params", "Function parameter tests"));
20484 #ifndef CTS_USES_VULKANSC
20485 static const char data_dir[] = "spirv_assembly/instruction/function_params";
20487 static const struct
20489 const std::string name;
20490 const std::string desc;
20493 { "sampler_param", "Test combined image sampler as function parameter" },
20496 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20498 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20499 cases[i].name.c_str(),
20500 cases[i].desc.c_str(),
20502 cases[i].name + ".amber");
20503 testGroup->addChild(testCase);
20506 return testGroup.release();
20509 tcu::TestCaseGroup* createEarlyFragmentTests(tcu::TestContext& testCtx)
20511 de::MovePtr<tcu::TestCaseGroup> earlyFragTests (new tcu::TestCaseGroup(testCtx, "early_fragment", "Early Fragment Tests"));
20513 #ifndef CTS_USES_VULKANSC
20514 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20516 static const struct Case
20523 // Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20524 { "depth_less", "gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." },
20525 { "depth_greater", "gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH." },
20526 { "depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20527 { "depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20528 { "depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20529 { "depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." }
20532 for (const auto& tCase : cases)
20534 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20535 tCase.name.c_str(),
20536 tCase.desc.c_str(),
20538 tCase.name + ".amber");
20540 earlyFragTests->addChild(testCase);
20542 #endif // CTS_USES_VULKANSC
20544 return earlyFragTests.release();
20547 tcu::TestCaseGroup* createEarlyAndLateFragmentTests(tcu::TestContext& testCtx)
20549 de::MovePtr<tcu::TestCaseGroup> earlyLateFragTests(new tcu::TestCaseGroup(testCtx, "early_and_late_fragment", "Early And Late Fragment Tests"));
20550 #ifndef CTS_USES_VULKANSC
20551 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_and_late_fragment";
20553 static const struct Case
20559 { "depth_less", "gl_FragDepth < CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." },
20560 { "depth_greater", "gl_FragDepth > CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH." },
20561 { "depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20562 { "depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20563 { "depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20564 { "depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." }
20567 for (const auto& tCase : cases)
20569 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20570 tCase.name.c_str(),
20571 tCase.desc.c_str(),
20573 tCase.name + ".amber",
20574 { "VK_AMD_shader_early_and_late_fragment_tests" });
20576 earlyLateFragTests->addChild(testCase);
20580 return earlyLateFragTests.release();
20583 tcu::TestCaseGroup* createOpExecutionModeTests (tcu::TestContext& testCtx)
20585 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "execution_mode", "Execution mode tests"));
20587 #ifndef CTS_USES_VULKANSC
20588 static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
20590 static const struct Case
20596 { "depthless_0", "FragDepth < Polygon depth: depth test should pass." },
20597 { "depthless_1", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, but the depth test should pass." },
20598 { "depthless_2", "FragDepth < Polygon depth: depth test should fail." },
20599 { "depthless_3", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, the depth test should fail." },
20600 { "depthless_4", "FragDepth < Polygon depth: depth test should pass." },
20601 { "depthgreater_0", "FragDepth > Polygon depth: depth test should pass." },
20602 { "depthgreater_1", "FragDepth < Polygon depth: violates the promise that FragDepth is greater than the implicit depth, but the depth test should pass." },
20603 { "depthgreater_2", "FragDepth > Polygon depth: depth test should fail." },
20604 { "depthgreater_3", "FragDepth > Polygon depth: violates the promise that FragDepth is greater than the implicit depth, the depth test should fail." },
20605 { "depthgreater_4", "FragDepth > Polygon depth: depth test should pass." },
20606 { "depthunchanged_0", "FragDepth == Polygon depth: depth test should pass." },
20607 { "depthunchanged_1", "FragDepth == Polygon depth: depth test should fail." },
20608 { "depthunchanged_2", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should pass." },
20609 { "depthunchanged_3", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should fail." },
20612 for (const auto& case_ : cases)
20614 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20615 case_.name.c_str(),
20616 case_.desc.c_str(),
20618 case_.name + ".amber");
20619 testGroup->addChild(testCase);
20621 #endif // CTS_USES_VULKANSC
20623 return testGroup.release();
20626 tcu::TestCaseGroup* createOpMulExtendedGroup (tcu::TestContext& testCtx)
20628 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "mul_extended", "Op[S/U]MulExtended tests"));
20630 #ifndef CTS_USES_VULKANSC
20631 static const char dataDir[] = "spirv_assembly/instruction/compute/mul_extended";
20633 static const struct Case
20636 const vector<string> features;
20639 { "signed_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"} },
20640 { "signed_32bit", {} },
20641 { "signed_64bit", {"Features.shaderInt64"} },
20642 { "signed_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"} },
20643 { "unsigned_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"} },
20644 { "unsigned_32bit", {} },
20645 { "unsigned_64bit", {"Features.shaderInt64"} },
20646 { "unsigned_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"} }
20649 for (const auto& test : cases)
20651 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20655 test.name + ".amber",
20657 testGroup->addChild(testCase);
20659 #endif // CTS_USES_VULKANSC
20661 return testGroup.release();
20664 tcu::TestCaseGroup* createQueryGroup (tcu::TestContext& testCtx)
20666 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "image_query", "image query tests"));
20668 #ifndef CTS_USES_VULKANSC
20669 static const char data_dir[] = "spirv_assembly/instruction/image_query";
20671 static const struct
20673 const std::string name;
20674 const std::string desc;
20677 { "samples_storage", "Test samples query can be used on storage images" },
20680 vector<string> requirements(1, "Features.shaderStorageImageMultisample");
20682 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20684 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20685 cases[i].name.c_str(),
20686 cases[i].desc.c_str(),
20688 cases[i].name + ".amber",
20690 testGroup->addChild(testCase);
20692 #endif // CTS_USES_VULKANSC
20694 return testGroup.release();
20697 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
20699 const bool testComputePipeline = true;
20701 de::MovePtr<tcu::TestCaseGroup> instructionTests (new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
20702 de::MovePtr<tcu::TestCaseGroup> computeTests (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
20703 de::MovePtr<tcu::TestCaseGroup> graphicsTests (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
20705 computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
20706 computeTests->addChild(createLocalSizeGroup(testCtx, false));
20707 computeTests->addChild(createLocalSizeGroup(testCtx, true));
20708 computeTests->addChild(createNonSemanticInfoGroup(testCtx));
20709 computeTests->addChild(createOpNopGroup(testCtx));
20710 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
20711 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
20712 computeTests->addChild(createOpAtomicGroup(testCtx, false));
20713 computeTests->addChild(createOpAtomicGroup(testCtx, true)); // Using new StorageBuffer decoration
20714 computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true)); // Return value validation
20715 computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true)); // volatile atomics
20716 computeTests->addChild(createOpLineGroup(testCtx));
20717 computeTests->addChild(createOpModuleProcessedGroup(testCtx));
20718 computeTests->addChild(createOpNoLineGroup(testCtx));
20719 computeTests->addChild(createOpConstantNullGroup(testCtx));
20720 computeTests->addChild(createOpConstantCompositeGroup(testCtx));
20721 computeTests->addChild(createOpConstantUsageGroup(testCtx));
20722 computeTests->addChild(createSpecConstantGroup(testCtx));
20723 computeTests->addChild(createOpSourceGroup(testCtx));
20724 computeTests->addChild(createOpSourceExtensionGroup(testCtx));
20725 computeTests->addChild(createDecorationGroupGroup(testCtx));
20726 computeTests->addChild(createOpPhiGroup(testCtx));
20727 computeTests->addChild(createLoopControlGroup(testCtx));
20728 computeTests->addChild(createFunctionControlGroup(testCtx));
20729 computeTests->addChild(createSelectionControlGroup(testCtx));
20730 computeTests->addChild(createBlockOrderGroup(testCtx));
20731 computeTests->addChild(createMultipleShaderGroup(testCtx));
20732 computeTests->addChild(createMemoryAccessGroup(testCtx));
20733 computeTests->addChild(createOpCopyMemoryGroup(testCtx));
20734 computeTests->addChild(createOpCopyObjectGroup(testCtx));
20735 computeTests->addChild(createNoContractionGroup(testCtx));
20736 computeTests->addChild(createOpUndefGroup(testCtx));
20737 computeTests->addChild(createOpUnreachableGroup(testCtx));
20738 computeTests->addChild(createOpQuantizeToF16Group(testCtx));
20739 computeTests->addChild(createOpFRemGroup(testCtx));
20740 computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20741 computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20742 computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20743 computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20744 #ifndef CTS_USES_VULKANSC
20745 computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
20746 computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
20747 computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
20748 computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
20749 computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
20750 computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
20751 #endif // CTS_USES_VULKANSC
20752 computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
20753 computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
20754 computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
20755 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
20756 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
20757 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
20758 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
20759 computeTests->addChild(createOpCompositeInsertGroup(testCtx));
20760 computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
20761 computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
20762 computeTests->addChild(createOpNMinGroup(testCtx));
20763 computeTests->addChild(createOpNMaxGroup(testCtx));
20764 computeTests->addChild(createOpNClampGroup(testCtx));
20765 computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
20767 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20769 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20770 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20772 computeTests->addChild(computeAndroidTests.release());
20775 computeTests->addChild(create8BitStorageComputeGroup(testCtx));
20776 computeTests->addChild(create16BitStorageComputeGroup(testCtx));
20777 computeTests->addChild(createFloatControlsComputeGroup(testCtx));
20778 computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
20779 computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
20780 computeTests->addChild(createVariableInitComputeGroup(testCtx));
20781 computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
20782 computeTests->addChild(createIndexingComputeGroup(testCtx));
20783 computeTests->addChild(createVariablePointersComputeGroup(testCtx));
20784 computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
20785 computeTests->addChild(createImageSamplerComputeGroup(testCtx));
20786 computeTests->addChild(createOpNameGroup(testCtx));
20787 computeTests->addChild(createOpMemberNameGroup(testCtx));
20788 computeTests->addChild(createPointerParameterComputeGroup(testCtx));
20789 computeTests->addChild(createFloat16Group(testCtx));
20790 #ifndef CTS_USES_VULKANSC
20791 computeTests->addChild(createFloat32Group(testCtx));
20792 #endif // CTS_USES_VULKANSC
20793 computeTests->addChild(createBoolGroup(testCtx));
20794 computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
20795 computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
20796 #ifndef CTS_USES_VULKANSC
20797 computeTests->addChild(createSignedIntCompareGroup(testCtx));
20798 computeTests->addChild(createSignedOpTestsGroup(testCtx));
20799 #endif // CTS_USES_VULKANSC
20800 computeTests->addChild(createUnusedVariableComputeTests(testCtx));
20801 #ifndef CTS_USES_VULKANSC
20802 computeTests->addChild(createPtrAccessChainGroup(testCtx));
20803 computeTests->addChild(createVectorShuffleGroup(testCtx));
20804 #endif // CTS_USES_VULKANSC
20805 computeTests->addChild(createHlslComputeGroup(testCtx));
20806 computeTests->addChild(createEmptyStructComputeGroup(testCtx));
20807 computeTests->addChild(create64bitCompareComputeGroup(testCtx));
20808 #ifndef CTS_USES_VULKANSC
20809 computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
20810 #endif // CTS_USES_VULKANSC
20811 computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
20812 computeTests->addChild(createOpMulExtendedGroup(testCtx));
20814 graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
20815 graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
20816 graphicsTests->addChild(createOpNopTests(testCtx));
20817 graphicsTests->addChild(createOpSourceTests(testCtx));
20818 graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
20819 graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
20820 graphicsTests->addChild(createOpLineTests(testCtx));
20821 graphicsTests->addChild(createOpNoLineTests(testCtx));
20822 graphicsTests->addChild(createOpConstantNullTests(testCtx));
20823 graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
20824 graphicsTests->addChild(createMemoryAccessTests(testCtx));
20825 graphicsTests->addChild(createOpUndefTests(testCtx));
20826 graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
20827 graphicsTests->addChild(createModuleTests(testCtx));
20828 graphicsTests->addChild(createUnusedVariableTests(testCtx));
20829 graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
20830 graphicsTests->addChild(createOpPhiTests(testCtx));
20831 graphicsTests->addChild(createNoContractionTests(testCtx));
20832 graphicsTests->addChild(createOpQuantizeTests(testCtx));
20833 graphicsTests->addChild(createLoopTests(testCtx));
20834 graphicsTests->addChild(createSpecConstantTests(testCtx));
20835 graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
20836 graphicsTests->addChild(createBarrierTests(testCtx));
20837 graphicsTests->addChild(createDecorationGroupTests(testCtx));
20838 graphicsTests->addChild(createFRemTests(testCtx));
20839 graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20840 graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20843 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20845 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20846 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20848 graphicsTests->addChild(graphicsAndroidTests.release());
20851 graphicsTests->addChild(createOpNameTests(testCtx));
20852 graphicsTests->addChild(createOpNameAbuseTests(testCtx));
20853 graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
20855 graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
20856 graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
20857 graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
20858 graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
20859 graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
20860 graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
20861 graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
20862 graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
20863 graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
20864 graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
20865 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
20866 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
20867 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
20868 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
20869 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
20870 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
20871 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
20872 graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
20873 graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
20874 graphicsTests->addChild(createFloat16Tests(testCtx));
20875 #ifndef CTS_USES_VULKANSC
20876 graphicsTests->addChild(createFloat32Tests(testCtx));
20877 #endif // CTS_USES_VULKANSC
20878 graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
20879 graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
20880 graphicsTests->addChild(createEarlyFragmentTests(testCtx));
20881 graphicsTests->addChild(createEarlyAndLateFragmentTests(testCtx));
20882 graphicsTests->addChild(createOpExecutionModeTests(testCtx));
20884 instructionTests->addChild(computeTests.release());
20885 instructionTests->addChild(graphicsTests.release());
20886 #ifndef CTS_USES_VULKANSC
20887 instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
20888 instructionTests->addChild(createFunctionParamsGroup(testCtx));
20889 #endif // CTS_USES_VULKANSC
20890 instructionTests->addChild(createQueryGroup(testCtx));
20891 instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
20892 instructionTests->addChild(createTerminateInvocationGroup(testCtx));
20894 return instructionTests.release();