From f906b895ecb710a018731aebc7fbd484e707a49b Mon Sep 17 00:00:00 2001 From: Ryp Date: Thu, 21 Apr 2022 22:05:17 +0300 Subject: [PATCH] Fix WavePrefixCountBits() being off by one. It was counting bits up to the current lane included, whereas the documentation says it should be excluded. This now matches dxc's behavior as well. Fix #2929 --- Test/baseResults/hlsl.waveprefix.comp.out | 6 +++--- glslang/HLSL/hlslParseHelper.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Test/baseResults/hlsl.waveprefix.comp.out b/Test/baseResults/hlsl.waveprefix.comp.out index cc4737e..e4e942c 100644 --- a/Test/baseResults/hlsl.waveprefix.comp.out +++ b/Test/baseResults/hlsl.waveprefix.comp.out @@ -1126,7 +1126,7 @@ local_size = (32, 16, 1) 0:54 0 (const int) 0:54 Constant: 0:54 0 (const int) -0:54 subgroupBallotInclusiveBitCount ( temp uint) +0:54 subgroupBallotExclusiveBitCount ( temp uint) 0:54 subgroupBallot ( temp 4-component vector of uint) 0:54 Compare Equal ( temp bool) 0:54 direct index ( temp uint) @@ -2289,7 +2289,7 @@ local_size = (32, 16, 1) 0:54 0 (const int) 0:54 Constant: 0:54 0 (const int) -0:54 subgroupBallotInclusiveBitCount ( temp uint) +0:54 subgroupBallotExclusiveBitCount ( temp uint) 0:54 subgroupBallot ( temp 4-component vector of uint) 0:54 Compare Equal ( temp bool) 0:54 direct index ( temp uint) @@ -2818,7 +2818,7 @@ local_size = (32, 16, 1) 390: 6(int) Load 389 392: 391(bool) IEqual 390 26 393: 13(ivec4) GroupNonUniformBallot 35 392 - 394: 6(int) GroupNonUniformBallotBitCount 35 InclusiveScan 393 + 394: 6(int) GroupNonUniformBallotBitCount 35 ExclusiveScan 393 395: 42(ptr) AccessChain 24(data) 25 386 25 26 Store 395 394 Return diff --git a/glslang/HLSL/hlslParseHelper.cpp b/glslang/HLSL/hlslParseHelper.cpp index 2d0a8e9..e9369a0 100644 --- a/glslang/HLSL/hlslParseHelper.cpp +++ b/glslang/HLSL/hlslParseHelper.cpp @@ -5430,7 +5430,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& } case EOpWavePrefixCountBits: { - // Mapped to subgroupBallotInclusiveBitCount(subgroupBallot()) + // Mapped to subgroupBallotExclusiveBitCount(subgroupBallot()) // builtin // uvec4 type. @@ -5444,7 +5444,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& TType uintType(EbtUint, EvqTemporary); node = intermediate.addBuiltInFunctionCall(loc, - EOpSubgroupBallotInclusiveBitCount, true, res, uintType); + EOpSubgroupBallotExclusiveBitCount, true, res, uintType); break; } -- 2.7.4