ir3: Prevent reordering movmsk with kill
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Mon, 5 Sep 2022 09:18:55 +0000 (12:18 +0300)
committerMarge Bot <emma+marge@anholt.net>
Wed, 14 Sep 2022 11:56:28 +0000 (11:56 +0000)
`kill` changes which fibers are active, thus reodering instructions
which depend on which fibers are active - is wrong.

The issue was hidden because only `ballot(true)` is translated to movmsk
immidiately, while others are passed as MACRO and don't properly
take part in ir3_sched (which does the reordering).

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7162

Fixes CTS test (on gen3+):
 dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.subgroup_ballot

Fixes: b1b80c06a78e62b2d8477b07f12b0153435b66a8
("ir3: Implement nir subgroup intrinsics")

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18413>

src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_compiler_nir.c

index 222eb61..667dca8 100644 (file)
@@ -483,6 +483,8 @@ struct ir3_instruction {
       IR3_BARRIER_PRIVATE_R = 1 << 9,
       IR3_BARRIER_PRIVATE_W = 1 << 10,
       IR3_BARRIER_CONST_W = 1 << 11,
+      IR3_BARRIER_ACTIVE_FIBERS_R = 1 << 12,
+      IR3_BARRIER_ACTIVE_FIBERS_W = 1 << 13,
    } barrier_class,
       barrier_conflict;
 
index 45f6c80..0272da3 100644 (file)
@@ -2471,9 +2471,13 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
          kill = ir3_KILL(b, cond, 0);
       }
 
-      /* Side-effects should not be moved on a different side of the kill */
-      kill->barrier_class = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W;
-      kill->barrier_conflict = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W;
+      /* - Side-effects should not be moved on a different side of the kill
+       * - Instructions that depend on active fibers should not be reordered
+       */
+      kill->barrier_class = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W |
+                            IR3_BARRIER_ACTIVE_FIBERS_W;
+      kill->barrier_conflict = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W |
+                               IR3_BARRIER_ACTIVE_FIBERS_R;
       kill->srcs[0]->num = regid(REG_P0, 0);
       array_insert(ctx->ir, ctx->ir->predicates, kill);
 
@@ -2566,6 +2570,10 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
          array_insert(ctx->ir, ctx->ir->predicates, ballot);
          ctx->max_stack = MAX2(ctx->max_stack, ctx->stack + 1);
       }
+
+      ballot->barrier_class = IR3_BARRIER_ACTIVE_FIBERS_R;
+      ballot->barrier_conflict = IR3_BARRIER_ACTIVE_FIBERS_W;
+
       ir3_split_dest(ctx->block, dst, ballot, 0, components);
       break;
    }