1 /*------------------------------------------------------------------------
2 * OpenGL Conformance Tests
3 * ------------------------
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2019 NVIDIA Corporation.
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
26 #include "glcSubgroupsBallotOtherTests.hpp"
27 #include "glcSubgroupsTestsUtils.hpp"
43 OPTYPE_INVERSE_BALLOT = 0,
44 OPTYPE_BALLOT_BIT_EXTRACT,
45 OPTYPE_BALLOT_BIT_COUNT,
46 OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT,
47 OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT,
48 OPTYPE_BALLOT_FIND_LSB,
49 OPTYPE_BALLOT_FIND_MSB,
53 static bool checkVertexPipelineStages(std::vector<const void*> datas,
54 deUint32 width, deUint32)
56 return glc::subgroups::check(datas, width, 0xf);
59 static bool checkComputeStage(std::vector<const void*> datas,
60 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
63 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 0xf);
66 std::string getOpTypeName(int opType)
71 DE_FATAL("Unsupported op type");
73 case OPTYPE_INVERSE_BALLOT:
74 return "subgroupInverseBallot";
75 case OPTYPE_BALLOT_BIT_EXTRACT:
76 return "subgroupBallotBitExtract";
77 case OPTYPE_BALLOT_BIT_COUNT:
78 return "subgroupBallotBitCount";
79 case OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT:
80 return "subgroupBallotInclusiveBitCount";
81 case OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT:
82 return "subgroupBallotExclusiveBitCount";
83 case OPTYPE_BALLOT_FIND_LSB:
84 return "subgroupBallotFindLSB";
85 case OPTYPE_BALLOT_FIND_MSB:
86 return "subgroupBallotFindMSB";
93 ShaderStageFlags shaderStage;
96 std::string getBodySource(CaseDefinition caseDef)
98 std::ostringstream bdy;
100 bdy << " uvec4 allOnes = uvec4(0xFFFFFFFF);\n"
101 << " uvec4 allZeros = uvec4(0);\n"
102 << " uint tempResult = 0;\n"
103 << "#define MAKE_HIGH_BALLOT_RESULT(i) uvec4("
104 << "i >= 32 ? 0 : (0xFFFFFFFF << i), "
105 << "i >= 64 ? 0 : (0xFFFFFFFF << ((i < 32) ? 0 : (i - 32))), "
106 << "i >= 96 ? 0 : (0xFFFFFFFF << ((i < 64) ? 0 : (i - 64))), "
107 << " 0xFFFFFFFF << ((i < 96) ? 0 : (i - 96)))\n"
108 << "#define MAKE_SINGLE_BIT_BALLOT_RESULT(i) uvec4("
109 << "i >= 32 ? 0 : 0x1 << i, "
110 << "i < 32 || i >= 64 ? 0 : 0x1 << (i - 32), "
111 << "i < 64 || i >= 96 ? 0 : 0x1 << (i - 64), "
112 << "i < 96 ? 0 : 0x1 << (i - 96))\n";
114 switch (caseDef.opType)
117 DE_FATAL("Unknown op type!");
119 case OPTYPE_INVERSE_BALLOT:
120 bdy << " tempResult |= subgroupInverseBallot(allOnes) ? 0x1 : 0;\n"
121 << " tempResult |= subgroupInverseBallot(allZeros) ? 0 : 0x2;\n"
122 << " tempResult |= subgroupInverseBallot(subgroupBallot(true)) ? 0x4 : 0;\n"
123 << " tempResult |= 0x8;\n";
125 case OPTYPE_BALLOT_BIT_EXTRACT:
126 bdy << " tempResult |= subgroupBallotBitExtract(allOnes, gl_SubgroupInvocationID) ? 0x1 : 0;\n"
127 << " tempResult |= subgroupBallotBitExtract(allZeros, gl_SubgroupInvocationID) ? 0 : 0x2;\n"
128 << " tempResult |= subgroupBallotBitExtract(subgroupBallot(true), gl_SubgroupInvocationID) ? 0x4 : 0;\n"
129 << " tempResult |= 0x8;\n"
130 << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
132 << " if (!subgroupBallotBitExtract(allOnes, gl_SubgroupInvocationID))\n"
134 << " tempResult &= ~0x8;\n"
138 case OPTYPE_BALLOT_BIT_COUNT:
139 bdy << " tempResult |= gl_SubgroupSize == subgroupBallotBitCount(allOnes) ? 0x1 : 0;\n"
140 << " tempResult |= 0 == subgroupBallotBitCount(allZeros) ? 0x2 : 0;\n"
141 << " tempResult |= 0 < subgroupBallotBitCount(subgroupBallot(true)) ? 0x4 : 0;\n"
142 << " tempResult |= 0 == subgroupBallotBitCount(MAKE_HIGH_BALLOT_RESULT(gl_SubgroupSize)) ? 0x8 : 0;\n";
144 case OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT:
145 bdy << " uint inclusiveOffset = gl_SubgroupInvocationID + 1;\n"
146 << " tempResult |= inclusiveOffset == subgroupBallotInclusiveBitCount(allOnes) ? 0x1 : 0;\n"
147 << " tempResult |= 0 == subgroupBallotInclusiveBitCount(allZeros) ? 0x2 : 0;\n"
148 << " tempResult |= 0 < subgroupBallotInclusiveBitCount(subgroupBallot(true)) ? 0x4 : 0;\n"
149 << " tempResult |= 0x8;\n"
150 << " uvec4 inclusiveUndef = MAKE_HIGH_BALLOT_RESULT(inclusiveOffset);\n"
151 << " bool undefTerritory = false;\n"
152 << " for (uint i = 0; i <= 128; i++)\n"
154 << " uvec4 iUndef = MAKE_HIGH_BALLOT_RESULT(i);\n"
155 << " if (iUndef == inclusiveUndef)"
157 << " undefTerritory = true;\n"
159 << " uint inclusiveBitCount = subgroupBallotInclusiveBitCount(iUndef);\n"
160 << " if (undefTerritory && (0 != inclusiveBitCount))\n"
162 << " tempResult &= ~0x8;\n"
164 << " else if (!undefTerritory && (0 == inclusiveBitCount))\n"
166 << " tempResult &= ~0x8;\n"
170 case OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT:
171 bdy << " uint exclusiveOffset = gl_SubgroupInvocationID;\n"
172 << " tempResult |= exclusiveOffset == subgroupBallotExclusiveBitCount(allOnes) ? 0x1 : 0;\n"
173 << " tempResult |= 0 == subgroupBallotExclusiveBitCount(allZeros) ? 0x2 : 0;\n"
174 << " tempResult |= 0x4;\n"
175 << " tempResult |= 0x8;\n"
176 << " uvec4 exclusiveUndef = MAKE_HIGH_BALLOT_RESULT(exclusiveOffset);\n"
177 << " bool undefTerritory = false;\n"
178 << " for (uint i = 0; i <= 128; i++)\n"
180 << " uvec4 iUndef = MAKE_HIGH_BALLOT_RESULT(i);\n"
181 << " if (iUndef == exclusiveUndef)"
183 << " undefTerritory = true;\n"
185 << " uint exclusiveBitCount = subgroupBallotExclusiveBitCount(iUndef);\n"
186 << " if (undefTerritory && (0 != exclusiveBitCount))\n"
188 << " tempResult &= ~0x4;\n"
190 << " else if (!undefTerritory && (0 == exclusiveBitCount))\n"
192 << " tempResult &= ~0x8;\n"
196 case OPTYPE_BALLOT_FIND_LSB:
197 bdy << " tempResult |= 0 == subgroupBallotFindLSB(allOnes) ? 0x1 : 0;\n"
198 << " if (subgroupElect())\n"
200 << " tempResult |= 0x2;\n"
204 << " tempResult |= 0 < subgroupBallotFindLSB(subgroupBallot(true)) ? 0x2 : 0;\n"
206 << " tempResult |= gl_SubgroupSize > subgroupBallotFindLSB(subgroupBallot(true)) ? 0x4 : 0;\n"
207 << " tempResult |= 0x8;\n"
208 << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
210 << " if (i != subgroupBallotFindLSB(MAKE_HIGH_BALLOT_RESULT(i)))\n"
212 << " tempResult &= ~0x8;\n"
216 case OPTYPE_BALLOT_FIND_MSB:
217 bdy << " tempResult |= (gl_SubgroupSize - 1) == subgroupBallotFindMSB(allOnes) ? 0x1 : 0;\n"
218 << " if (subgroupElect())\n"
220 << " tempResult |= 0x2;\n"
224 << " tempResult |= 0 < subgroupBallotFindMSB(subgroupBallot(true)) ? 0x2 : 0;\n"
226 << " tempResult |= gl_SubgroupSize > subgroupBallotFindMSB(subgroupBallot(true)) ? 0x4 : 0;\n"
227 << " tempResult |= 0x8;\n"
228 << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
230 << " if (i != subgroupBallotFindMSB(MAKE_SINGLE_BIT_BALLOT_RESULT(i)))\n"
232 << " tempResult &= ~0x8;\n"
240 void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
242 subgroups::setFragmentShaderFrameBuffer(programCollection);
244 if (SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
245 subgroups::setVertexShaderFrameBuffer(programCollection);
247 std::string bdyStr = getBodySource(caseDef);
249 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
251 std::ostringstream vertex;
252 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
253 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
254 << "layout(location = 0) in highp vec4 in_position;\n"
255 << "layout(location = 0) out float out_color;\n"
257 << "void main (void)\n"
260 << " out_color = float(tempResult);\n"
261 << " gl_Position = in_position;\n"
262 << " gl_PointSize = 1.0f;\n"
264 programCollection.add("vert") << glu::VertexSource(vertex.str());
266 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
268 std::ostringstream geometry;
270 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
271 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
272 << "layout(points) in;\n"
273 << "layout(points, max_vertices = 1) out;\n"
274 << "layout(location = 0) out float out_color;\n"
275 << "void main (void)\n"
278 << " out_color = float(tempResult);\n"
279 << " gl_Position = gl_in[0].gl_Position;\n"
280 << " EmitVertex();\n"
281 << " EndPrimitive();\n"
284 programCollection.add("geometry") << glu::GeometrySource(geometry.str());
286 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
288 std::ostringstream controlSource;
290 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
291 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
292 << "layout(vertices = 2) out;\n"
293 << "layout(location = 0) out float out_color[];\n"
295 << "void main (void)\n"
297 << " if (gl_InvocationID == 0)\n"
299 << " gl_TessLevelOuter[0] = 1.0f;\n"
300 << " gl_TessLevelOuter[1] = 1.0f;\n"
303 << " out_color[gl_InvocationID ] = float(tempResult);\n"
304 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
307 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
308 subgroups::setTesEvalShaderFrameBuffer(programCollection);
310 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
312 std::ostringstream evaluationSource;
313 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
314 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
315 << "layout(isolines, equal_spacing, ccw ) in;\n"
316 << "layout(location = 0) out float out_color;\n"
317 << "void main (void)\n"
320 << " out_color = float(tempResult);\n"
321 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
324 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
325 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
329 DE_FATAL("Unsupported shader stage");
333 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
335 std::string bdyStr = getBodySource(caseDef);
337 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
339 std::ostringstream src;
341 src << "#version 450\n"
342 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
343 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
344 << "layout(binding = 0, std430) buffer Buffer0\n"
346 << " uint result[];\n"
349 << "void main (void)\n"
351 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
352 << " highp uint offset = globalSize.x * ((globalSize.y * "
353 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
354 "gl_GlobalInvocationID.x;\n"
356 << " result[offset] = tempResult;\n"
359 programCollection.add("comp") << glu::ComputeSource(src.str());
363 const string vertex =
365 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
366 "layout(binding = 0, std430) buffer Buffer0\n"
374 " b0.result[gl_VertexID] = tempResult;\n"
375 " float pixelSize = 2.0f/1024.0f;\n"
376 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
377 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
378 " gl_PointSize = 1.0f;\n"
383 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
384 "layout(vertices=1) out;\n"
385 "layout(binding = 1, std430) buffer Buffer1\n"
393 " b1.result[gl_PrimitiveID] = tempResult;\n"
394 " if (gl_InvocationID == 0)\n"
396 " gl_TessLevelOuter[0] = 1.0f;\n"
397 " gl_TessLevelOuter[1] = 1.0f;\n"
399 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
404 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
405 "layout(isolines) in;\n"
406 "layout(binding = 2, std430) buffer Buffer2\n"
414 " b2.result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
415 " float pixelSize = 2.0f/1024.0f;\n"
416 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
419 const string geometry =
421 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
422 "layout(${TOPOLOGY}) in;\n"
423 "layout(points, max_vertices = 1) out;\n"
424 "layout(binding = 3, std430) buffer Buffer3\n"
432 " b3.result[gl_PrimitiveIDIn] = tempResult;\n"
433 " gl_Position = gl_in[0].gl_Position;\n"
438 const string fragment =
440 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
441 "layout(location = 0) out uint result;\n"
445 " result = tempResult;\n"
448 subgroups::addNoSubgroupShader(programCollection);
450 programCollection.add("vert") << glu::VertexSource(vertex);
451 programCollection.add("tesc") << glu::TessellationControlSource(tesc);
452 programCollection.add("tese") << glu::TessellationEvaluationSource(tese);
453 subgroups::addGeometryShadersFromTemplate(geometry, programCollection);
454 programCollection.add("fragment") << glu::FragmentSource(fragment);
458 void supportedCheck (Context& context, CaseDefinition caseDef)
461 if (!subgroups::isSubgroupSupported(context))
462 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
464 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BALLOT_BIT))
466 TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
470 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
472 if (!subgroups::areSubgroupOperationsSupportedForStage(
473 context, caseDef.shaderStage))
475 if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
477 return tcu::TestStatus::fail(
479 subgroups::getShaderStageName(caseDef.shaderStage) +
480 " is required to support subgroup operations!");
484 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
488 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
489 return subgroups::makeVertexFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
490 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
491 return subgroups::makeGeometryFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
492 else if ((SHADER_STAGE_TESS_CONTROL_BIT | SHADER_STAGE_TESS_EVALUATION_BIT) & caseDef.shaderStage)
493 return subgroups::makeTessellationEvaluationFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
495 TCU_THROW(InternalError, "Unhandled shader stage");
498 tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
500 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
502 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
504 return tcu::TestStatus::fail(
506 subgroups::getShaderStageName(caseDef.shaderStage) +
507 " is required to support subgroup operations!");
509 return subgroups::makeComputeTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage);
513 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
515 ShaderStageFlags stages = (ShaderStageFlags)(caseDef.shaderStage & supportedStages);
517 if ( SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
519 if ( (stages & SHADER_STAGE_FRAGMENT_BIT) == 0)
520 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
522 stages = SHADER_STAGE_FRAGMENT_BIT;
525 if ((ShaderStageFlags)0u == stages)
526 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
528 return subgroups::allStages(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
530 return tcu::TestStatus::pass("OK");
534 deqp::TestCaseGroup* createSubgroupsBallotOtherTests(deqp::Context& testCtx)
536 de::MovePtr<deqp::TestCaseGroup> graphicGroup(new deqp::TestCaseGroup(
537 testCtx, "graphics", "Subgroup ballot other category tests: graphics"));
538 de::MovePtr<deqp::TestCaseGroup> computeGroup(new deqp::TestCaseGroup(
539 testCtx, "compute", "Subgroup ballot other category tests: compute"));
540 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(new deqp::TestCaseGroup(
541 testCtx, "framebuffer", "Subgroup ballot other category tests: framebuffer"));
543 const ShaderStageFlags stages[] =
545 SHADER_STAGE_VERTEX_BIT,
546 SHADER_STAGE_TESS_EVALUATION_BIT,
547 SHADER_STAGE_TESS_CONTROL_BIT,
548 SHADER_STAGE_GEOMETRY_BIT,
551 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
553 const string op = de::toLower(getOpTypeName(opTypeIndex));
555 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT};
556 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
560 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS};
561 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
564 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
566 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
567 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
571 de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(
572 testCtx, "ballot_other", "Subgroup ballot other category tests"));
574 group->addChild(graphicGroup.release());
575 group->addChild(computeGroup.release());
576 group->addChild(framebufferGroup.release());
578 return group.release();