Fix WavePrefixCountBits() being off by one.
authorRyp <thibault.schueller@protonmail.com>
Thu, 21 Apr 2022 19:05:17 +0000 (22:05 +0300)
committerRyp <thibault.schueller@protonmail.com>
Fri, 22 Apr 2022 17:59:10 +0000 (20:59 +0300)
It was counting bits up to the current lane included, whereas the
documentation says it should be excluded. This now matches dxc's behavior
as well.

Fix #2929

Test/baseResults/hlsl.waveprefix.comp.out
glslang/HLSL/hlslParseHelper.cpp

index cc4737e..e4e942c 100644 (file)
@@ -1126,7 +1126,7 @@ local_size = (32, 16, 1)
 0:54              0 (const int)
 0:54          Constant:
 0:54            0 (const int)
-0:54        subgroupBallotInclusiveBitCount ( temp uint)
+0:54        subgroupBallotExclusiveBitCount ( temp uint)
 0:54          subgroupBallot ( temp 4-component vector of uint)
 0:54            Compare Equal ( temp bool)
 0:54              direct index ( temp uint)
@@ -2289,7 +2289,7 @@ local_size = (32, 16, 1)
 0:54              0 (const int)
 0:54          Constant:
 0:54            0 (const int)
-0:54        subgroupBallotInclusiveBitCount ( temp uint)
+0:54        subgroupBallotExclusiveBitCount ( temp uint)
 0:54          subgroupBallot ( temp 4-component vector of uint)
 0:54            Compare Equal ( temp bool)
 0:54              direct index ( temp uint)
@@ -2818,7 +2818,7 @@ local_size = (32, 16, 1)
              390:      6(int) Load 389
              392:   391(bool) IEqual 390 26
              393:   13(ivec4) GroupNonUniformBallot 35 392
-             394:      6(int) GroupNonUniformBallotBitCount 35 InclusiveScan 393
+             394:      6(int) GroupNonUniformBallotBitCount 35 ExclusiveScan 393
              395:     42(ptr) AccessChain 24(data) 25 386 25 26
                               Store 395 394
                               Return
index 2d0a8e9..e9369a0 100644 (file)
@@ -5430,7 +5430,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
         }
     case EOpWavePrefixCountBits:
         {
-            // Mapped to subgroupBallotInclusiveBitCount(subgroupBallot())
+            // Mapped to subgroupBallotExclusiveBitCount(subgroupBallot())
             // builtin
 
             // uvec4 type.
@@ -5444,7 +5444,7 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
             TType uintType(EbtUint, EvqTemporary);
 
             node = intermediate.addBuiltInFunctionCall(loc,
-                EOpSubgroupBallotInclusiveBitCount, true, res, uintType);
+                EOpSubgroupBallotExclusiveBitCount, true, res, uintType);
 
             break;
         }