1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2018 NVIDIA Corporation
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
26 #include "vktSubgroupsPartitionedTests.hpp"
27 #include "vktSubgroupsTestsUtils.hpp"
65 static bool checkVertexPipelineStages(std::vector<const void*> datas,
66 deUint32 width, deUint32)
68 const deUint32* data =
69 reinterpret_cast<const deUint32*>(datas[0]);
70 for (deUint32 x = 0; x < width; ++x)
72 deUint32 val = data[x];
83 static bool checkCompute(std::vector<const void*> datas,
84 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
87 const deUint32* data =
88 reinterpret_cast<const deUint32*>(datas[0]);
90 for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
92 for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
94 for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
96 for (deUint32 lX = 0; lX < localSize[0]; ++lX)
98 for (deUint32 lY = 0; lY < localSize[1]; ++lY)
100 for (deUint32 lZ = 0; lZ < localSize[2];
103 const deUint32 globalInvocationX =
104 nX * localSize[0] + lX;
105 const deUint32 globalInvocationY =
106 nY * localSize[1] + lY;
107 const deUint32 globalInvocationZ =
108 nZ * localSize[2] + lZ;
110 const deUint32 globalSizeX =
111 numWorkgroups[0] * localSize[0];
112 const deUint32 globalSizeY =
113 numWorkgroups[1] * localSize[1];
115 const deUint32 offset =
122 if (0xFFFFFF != data[offset])
136 std::string getOpTypeName(int opType)
141 DE_FATAL("Unsupported op type");
144 return "subgroupAdd";
146 return "subgroupMul";
148 return "subgroupMin";
150 return "subgroupMax";
152 return "subgroupAnd";
156 return "subgroupXor";
157 case OPTYPE_INCLUSIVE_ADD:
158 return "subgroupInclusiveAdd";
159 case OPTYPE_INCLUSIVE_MUL:
160 return "subgroupInclusiveMul";
161 case OPTYPE_INCLUSIVE_MIN:
162 return "subgroupInclusiveMin";
163 case OPTYPE_INCLUSIVE_MAX:
164 return "subgroupInclusiveMax";
165 case OPTYPE_INCLUSIVE_AND:
166 return "subgroupInclusiveAnd";
167 case OPTYPE_INCLUSIVE_OR:
168 return "subgroupInclusiveOr";
169 case OPTYPE_INCLUSIVE_XOR:
170 return "subgroupInclusiveXor";
171 case OPTYPE_EXCLUSIVE_ADD:
172 return "subgroupExclusiveAdd";
173 case OPTYPE_EXCLUSIVE_MUL:
174 return "subgroupExclusiveMul";
175 case OPTYPE_EXCLUSIVE_MIN:
176 return "subgroupExclusiveMin";
177 case OPTYPE_EXCLUSIVE_MAX:
178 return "subgroupExclusiveMax";
179 case OPTYPE_EXCLUSIVE_AND:
180 return "subgroupExclusiveAnd";
181 case OPTYPE_EXCLUSIVE_OR:
182 return "subgroupExclusiveOr";
183 case OPTYPE_EXCLUSIVE_XOR:
184 return "subgroupExclusiveXor";
188 std::string getOpTypeNamePartitioned(int opType)
193 DE_FATAL("Unsupported op type");
196 return "subgroupPartitionedAddNV";
198 return "subgroupPartitionedMulNV";
200 return "subgroupPartitionedMinNV";
202 return "subgroupPartitionedMaxNV";
204 return "subgroupPartitionedAndNV";
206 return "subgroupPartitionedOrNV";
208 return "subgroupPartitionedXorNV";
209 case OPTYPE_INCLUSIVE_ADD:
210 return "subgroupPartitionedInclusiveAddNV";
211 case OPTYPE_INCLUSIVE_MUL:
212 return "subgroupPartitionedInclusiveMulNV";
213 case OPTYPE_INCLUSIVE_MIN:
214 return "subgroupPartitionedInclusiveMinNV";
215 case OPTYPE_INCLUSIVE_MAX:
216 return "subgroupPartitionedInclusiveMaxNV";
217 case OPTYPE_INCLUSIVE_AND:
218 return "subgroupPartitionedInclusiveAndNV";
219 case OPTYPE_INCLUSIVE_OR:
220 return "subgroupPartitionedInclusiveOrNV";
221 case OPTYPE_INCLUSIVE_XOR:
222 return "subgroupPartitionedInclusiveXorNV";
223 case OPTYPE_EXCLUSIVE_ADD:
224 return "subgroupPartitionedExclusiveAddNV";
225 case OPTYPE_EXCLUSIVE_MUL:
226 return "subgroupPartitionedExclusiveMulNV";
227 case OPTYPE_EXCLUSIVE_MIN:
228 return "subgroupPartitionedExclusiveMinNV";
229 case OPTYPE_EXCLUSIVE_MAX:
230 return "subgroupPartitionedExclusiveMaxNV";
231 case OPTYPE_EXCLUSIVE_AND:
232 return "subgroupPartitionedExclusiveAndNV";
233 case OPTYPE_EXCLUSIVE_OR:
234 return "subgroupPartitionedExclusiveOrNV";
235 case OPTYPE_EXCLUSIVE_XOR:
236 return "subgroupPartitionedExclusiveXorNV";
240 std::string getIdentity(int opType, vk::VkFormat format)
242 bool isFloat = false;
244 bool isUnsigned = false;
249 DE_FATAL("Unhandled format!");
251 case VK_FORMAT_R32_SINT:
252 case VK_FORMAT_R32G32_SINT:
253 case VK_FORMAT_R32G32B32_SINT:
254 case VK_FORMAT_R32G32B32A32_SINT:
257 case VK_FORMAT_R32_UINT:
258 case VK_FORMAT_R32G32_UINT:
259 case VK_FORMAT_R32G32B32_UINT:
260 case VK_FORMAT_R32G32B32A32_UINT:
263 case VK_FORMAT_R32_SFLOAT:
264 case VK_FORMAT_R32G32_SFLOAT:
265 case VK_FORMAT_R32G32B32_SFLOAT:
266 case VK_FORMAT_R32G32B32A32_SFLOAT:
267 case VK_FORMAT_R64_SFLOAT:
268 case VK_FORMAT_R64G64_SFLOAT:
269 case VK_FORMAT_R64G64B64_SFLOAT:
270 case VK_FORMAT_R64G64B64A64_SFLOAT:
273 case VK_FORMAT_R8_USCALED:
274 case VK_FORMAT_R8G8_USCALED:
275 case VK_FORMAT_R8G8B8_USCALED:
276 case VK_FORMAT_R8G8B8A8_USCALED:
277 break; // bool types are not anything
283 DE_FATAL("Unsupported op type");
286 case OPTYPE_INCLUSIVE_ADD:
287 case OPTYPE_EXCLUSIVE_ADD:
288 return subgroups::getFormatNameForGLSL(format) + "(0)";
290 case OPTYPE_INCLUSIVE_MUL:
291 case OPTYPE_EXCLUSIVE_MUL:
292 return subgroups::getFormatNameForGLSL(format) + "(1)";
294 case OPTYPE_INCLUSIVE_MIN:
295 case OPTYPE_EXCLUSIVE_MIN:
298 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
302 return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
306 return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
310 DE_FATAL("Unhandled case");
314 case OPTYPE_INCLUSIVE_MAX:
315 case OPTYPE_EXCLUSIVE_MAX:
318 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
322 return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
326 return subgroups::getFormatNameForGLSL(format) + "(0)";
330 DE_FATAL("Unhandled case");
334 case OPTYPE_INCLUSIVE_AND:
335 case OPTYPE_EXCLUSIVE_AND:
336 return subgroups::getFormatNameForGLSL(format) + "(~0)";
338 case OPTYPE_INCLUSIVE_OR:
339 case OPTYPE_EXCLUSIVE_OR:
340 return subgroups::getFormatNameForGLSL(format) + "(0)";
342 case OPTYPE_INCLUSIVE_XOR:
343 case OPTYPE_EXCLUSIVE_XOR:
344 return subgroups::getFormatNameForGLSL(format) + "(0)";
348 std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
350 std::string formatName = subgroups::getFormatNameForGLSL(format);
354 return "all(equal(" + lhs + ", " + rhs + "))";
355 case VK_FORMAT_R8_USCALED:
356 case VK_FORMAT_R32_UINT:
357 case VK_FORMAT_R32_SINT:
358 return "(" + lhs + " == " + rhs + ")";
359 case VK_FORMAT_R32_SFLOAT:
360 case VK_FORMAT_R64_SFLOAT:
364 return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
366 case OPTYPE_INCLUSIVE_MIN:
367 case OPTYPE_EXCLUSIVE_MIN:
369 case OPTYPE_INCLUSIVE_MAX:
370 case OPTYPE_EXCLUSIVE_MAX:
371 return "(" + lhs + " == " + rhs + ")";
373 case VK_FORMAT_R32G32_SFLOAT:
374 case VK_FORMAT_R32G32B32_SFLOAT:
375 case VK_FORMAT_R32G32B32A32_SFLOAT:
376 case VK_FORMAT_R64G64_SFLOAT:
377 case VK_FORMAT_R64G64B64_SFLOAT:
378 case VK_FORMAT_R64G64B64A64_SFLOAT:
382 return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
384 case OPTYPE_INCLUSIVE_MIN:
385 case OPTYPE_EXCLUSIVE_MIN:
387 case OPTYPE_INCLUSIVE_MAX:
388 case OPTYPE_EXCLUSIVE_MAX:
389 return "all(equal(" + lhs + ", " + rhs + "))";
394 struct CaseDefinition
397 VkShaderStageFlags shaderStage;
401 string getTestString(const CaseDefinition &caseDef)
403 // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
404 // conversion overflow in framebuffer tests.
405 string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
407 " uint tempResult = 0;\n"
408 " uint id = gl_SubgroupInvocationID;\n";
410 // Test the case where the partition has a single subset with all invocations in it.
411 // This should generate the same result as the non-partitioned function.
413 " uvec4 allBallot = mask;\n"
414 " " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
415 " " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
416 " if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
417 " tempResult |= 0x1;\n"
420 // The definition of a partition doesn't forbid bits corresponding to inactive
421 // invocations being in the subset with active invocations. In other words, test that
422 // bits corresponding to inactive invocations are ignored.
424 " if (0 == (gl_SubgroupInvocationID % 2)) {\n"
425 " " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
426 " " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
427 " if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
428 " tempResult |= 0x2;\n"
431 " tempResult |= 0x2;\n"
434 // Test the case where the partition has each invocation in a unique subset. For
435 // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
436 string expectedSelfResult = "data[gl_SubgroupInvocationID]";
437 if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD &&
438 caseDef.opType <= OPTYPE_EXCLUSIVE_XOR) {
439 expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
443 " uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
444 " " + fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], selfBallot);\n"
445 " if (" + getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) + ") {\n"
446 " tempResult |= 0x4;\n"
449 // Test "random" partitions based on a hash of the invocation id.
450 // This "hash" function produces interesting/randomish partitions.
451 static const char *idhash = "((id%N)+(id%(N+1))-(id%2)+(id/2))%((N+1)/2)";
454 " for (uint N = 1; N < 16; ++N) {\n"
455 " " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
456 " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
457 " " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
458 " for (uint i = 0; i < N; ++i) {\n"
459 " " + fmt + " iFmt = " + fmt + "(i);\n"
460 " if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
461 " " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
462 " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x4 << N) : 0;\n"
466 // tests in flow control:
467 " if (1 == (gl_SubgroupInvocationID % 2)) {\n"
468 " for (uint N = 1; N < 7; ++N) {\n"
469 " " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
470 " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
471 " " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
472 " for (uint i = 0; i < N; ++i) {\n"
473 " " + fmt + " iFmt = " + fmt + "(i);\n"
474 " if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
475 " " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
476 " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x20000 << N) : 0;\n"
481 " tempResult |= 0xFC0000;\n"
488 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
490 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
491 std::ostringstream bdy;
493 subgroups::setFragmentShaderFrameBuffer(programCollection);
495 if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
496 subgroups::setVertexShaderFrameBuffer(programCollection);
498 bdy << getTestString(caseDef);
500 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
502 std::ostringstream vertexSrc;
503 vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
504 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
505 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
506 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
507 << "layout(location = 0) in highp vec4 in_position;\n"
508 << "layout(location = 0) out float out_color;\n"
509 << "layout(set = 0, binding = 0) uniform Buffer1\n"
511 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
514 << "void main (void)\n"
516 << " uvec4 mask = subgroupBallot(true);\n"
518 << " out_color = float(tempResult);\n"
519 << " gl_Position = in_position;\n"
520 << " gl_PointSize = 1.0f;\n"
522 programCollection.glslSources.add("vert")
523 << glu::VertexSource(vertexSrc.str()) << buildOptions;
525 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
527 std::ostringstream geometry;
529 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
530 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
531 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
532 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
533 << "layout(points) in;\n"
534 << "layout(points, max_vertices = 1) out;\n"
535 << "layout(location = 0) out float out_color;\n"
536 << "layout(set = 0, binding = 0) uniform Buffer\n"
538 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
541 << "void main (void)\n"
543 << " uvec4 mask = subgroupBallot(true);\n"
545 << " out_color = float(tempResult);\n"
546 << " gl_Position = gl_in[0].gl_Position;\n"
547 << " EmitVertex();\n"
548 << " EndPrimitive();\n"
551 programCollection.glslSources.add("geometry")
552 << glu::GeometrySource(geometry.str()) << buildOptions;
554 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
556 std::ostringstream controlSource;
557 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
558 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
559 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
560 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
561 << "layout(vertices = 2) out;\n"
562 << "layout(location = 0) out float out_color[];\n"
563 << "layout(set = 0, binding = 0) uniform Buffer1\n"
565 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
568 << "void main (void)\n"
570 << " if (gl_InvocationID == 0)\n"
572 << " gl_TessLevelOuter[0] = 1.0f;\n"
573 << " gl_TessLevelOuter[1] = 1.0f;\n"
575 << " uvec4 mask = subgroupBallot(true);\n"
577 << " out_color[gl_InvocationID] = float(tempResult);"
578 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
582 programCollection.glslSources.add("tesc")
583 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
584 subgroups::setTesEvalShaderFrameBuffer(programCollection);
586 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
589 std::ostringstream evaluationSource;
590 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
591 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
592 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
593 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
594 << "layout(isolines, equal_spacing, ccw ) in;\n"
595 << "layout(location = 0) out float out_color;\n"
596 << "layout(set = 0, binding = 0) uniform Buffer1\n"
598 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
601 << "void main (void)\n"
603 << " uvec4 mask = subgroupBallot(true);\n"
605 << " out_color = float(tempResult);\n"
606 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
609 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
610 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
614 DE_FATAL("Unsupported shader stage");
618 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
620 const string bdy = getTestString(caseDef);
622 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
624 std::ostringstream src;
626 src << "#version 450\n"
627 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
628 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
629 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
630 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
631 "local_size_z_id = 2) in;\n"
632 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
634 << " uint result[];\n"
636 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
638 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
641 << "void main (void)\n"
643 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
644 << " highp uint offset = globalSize.x * ((globalSize.y * "
645 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
646 "gl_GlobalInvocationID.x;\n"
647 << " uvec4 mask = subgroupBallot(true);\n"
649 << " result[offset] = tempResult;\n"
652 programCollection.glslSources.add("comp")
653 << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
658 const std::string vertex =
660 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
661 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
662 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
663 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
667 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
669 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
674 " uvec4 mask = subgroupBallot(true);\n"
676 " result[gl_VertexIndex] = tempResult;\n"
677 " float pixelSize = 2.0f/1024.0f;\n"
678 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
679 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
680 " gl_PointSize = 1.0f;\n"
682 programCollection.glslSources.add("vert")
683 << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
687 const std::string tesc =
689 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
690 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
691 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
692 "layout(vertices=1) out;\n"
693 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
697 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
699 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
704 " uvec4 mask = subgroupBallot(true);\n"
706 " result[gl_PrimitiveID] = tempResult;\n"
707 " if (gl_InvocationID == 0)\n"
709 " gl_TessLevelOuter[0] = 1.0f;\n"
710 " gl_TessLevelOuter[1] = 1.0f;\n"
712 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
714 programCollection.glslSources.add("tesc")
715 << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
719 const std::string tese =
721 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
722 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
723 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
724 "layout(isolines) in;\n"
725 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
729 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
731 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
736 " uvec4 mask = subgroupBallot(true);\n"
738 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
739 " float pixelSize = 2.0f/1024.0f;\n"
740 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
742 programCollection.glslSources.add("tese")
743 << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
747 const std::string geometry =
749 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
750 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
751 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
752 "layout(${TOPOLOGY}) in;\n"
753 "layout(points, max_vertices = 1) out;\n"
754 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
758 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
760 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
765 " uvec4 mask = subgroupBallot(true);\n"
767 " result[gl_PrimitiveIDIn] = tempResult;\n"
768 " gl_Position = gl_in[0].gl_Position;\n"
772 subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
773 programCollection.glslSources);
777 const std::string fragment =
779 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
780 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
781 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
782 "layout(location = 0) out uint result;\n"
783 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
785 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
789 " uvec4 mask = subgroupBallot(true);\n"
791 " result = tempResult;\n"
793 programCollection.glslSources.add("fragment")
794 << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
796 subgroups::addNoSubgroupShader(programCollection);
800 void supportedCheck (Context& context, CaseDefinition caseDef)
802 if (!subgroups::isSubgroupSupported(context))
803 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
805 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
807 TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
810 if (subgroups::isDoubleFormat(caseDef.format) &&
811 !subgroups::isDoubleSupportedForDevice(context))
813 TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
817 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
819 if (!subgroups::areSubgroupOperationsSupportedForStage(
820 context, caseDef.shaderStage))
822 if (subgroups::areSubgroupOperationsRequiredForStage(
823 caseDef.shaderStage))
825 return tcu::TestStatus::fail(
827 subgroups::getShaderStageName(caseDef.shaderStage) +
828 " is required to support subgroup operations!");
832 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
836 subgroups::SSBOData inputData;
837 inputData.format = caseDef.format;
838 inputData.numElements = subgroups::maxSupportedSubgroupSize();
839 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
841 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
842 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
843 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
844 return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
845 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
846 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
847 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
848 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
850 TCU_THROW(InternalError, "Unhandled shader stage");
853 bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
855 if (!subgroups::areSubgroupOperationsSupportedForStage(
856 context, caseDef.shaderStage))
858 if (subgroups::areSubgroupOperationsRequiredForStage(
859 caseDef.shaderStage))
865 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
871 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
873 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
875 if(!checkShaderStages(context,caseDef))
877 return tcu::TestStatus::fail(
879 subgroups::getShaderStageName(caseDef.shaderStage) +
880 " is required to support subgroup operations!");
882 subgroups::SSBOData inputData;
883 inputData.format = caseDef.format;
884 inputData.numElements = subgroups::maxSupportedSubgroupSize();
885 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
887 return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
891 VkPhysicalDeviceSubgroupProperties subgroupProperties;
892 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
893 subgroupProperties.pNext = DE_NULL;
895 VkPhysicalDeviceProperties2 properties;
896 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
897 properties.pNext = &subgroupProperties;
899 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
901 VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
903 if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
905 if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
906 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
908 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
911 if ((VkShaderStageFlagBits)0u == stages)
912 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
914 subgroups::SSBOData inputData;
915 inputData.format = caseDef.format;
916 inputData.numElements = subgroups::maxSupportedSubgroupSize();
917 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
918 inputData.binding = 4u;
919 inputData.stages = stages;
921 return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
922 1, checkVertexPipelineStages, stages);
931 tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
933 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
934 testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
936 const VkShaderStageFlags stages[] =
938 VK_SHADER_STAGE_VERTEX_BIT,
939 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
940 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
941 VK_SHADER_STAGE_GEOMETRY_BIT,
944 const VkFormat formats[] =
946 VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
947 VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
948 VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
949 VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
950 VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
951 VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
952 VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
953 VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
954 VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
957 for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
959 const VkFormat format = formats[formatIndex];
961 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
964 bool isFloat = false;
970 case VK_FORMAT_R32_SFLOAT:
971 case VK_FORMAT_R32G32_SFLOAT:
972 case VK_FORMAT_R32G32B32_SFLOAT:
973 case VK_FORMAT_R32G32B32A32_SFLOAT:
974 case VK_FORMAT_R64_SFLOAT:
975 case VK_FORMAT_R64G64_SFLOAT:
976 case VK_FORMAT_R64G64B64_SFLOAT:
977 case VK_FORMAT_R64G64B64A64_SFLOAT:
980 case VK_FORMAT_R8_USCALED:
981 case VK_FORMAT_R8G8_USCALED:
982 case VK_FORMAT_R8G8B8_USCALED:
983 case VK_FORMAT_R8G8B8A8_USCALED:
988 bool isBitwiseOp = false;
995 case OPTYPE_INCLUSIVE_AND:
996 case OPTYPE_EXCLUSIVE_AND:
998 case OPTYPE_INCLUSIVE_OR:
999 case OPTYPE_EXCLUSIVE_OR:
1001 case OPTYPE_INCLUSIVE_XOR:
1002 case OPTYPE_EXCLUSIVE_XOR:
1007 if (isFloat && isBitwiseOp)
1009 // Skip float with bitwise category.
1013 if (isBool && !isBitwiseOp)
1015 // Skip bool when its not the bitwise category.
1018 std::string op = getOpTypeName(opTypeIndex);
1021 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
1022 addFunctionCaseWithPrograms(group.get(),
1023 de::toLower(op) + "_" +
1024 subgroups::getFormatNameForGLSL(format) +
1025 "_" + getShaderStageName(caseDef.shaderStage),
1026 "", supportedCheck, initPrograms, test, caseDef);
1030 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
1031 addFunctionCaseWithPrograms(group.get(),
1032 de::toLower(op) + "_" +
1033 subgroups::getFormatNameForGLSL(format) +
1035 "", supportedCheck, initPrograms, test, caseDef);
1038 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1040 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
1041 addFunctionCaseWithPrograms(group.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
1042 "_" + getShaderStageName(caseDef.shaderStage) + "_framebuffer", "",
1043 supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
1048 return group.release();