gallivm: Return 0 for first active invocation when no invocations are active.
authorEmma Anholt <emma@anholt.net>
Tue, 28 Feb 2023 20:06:11 +0000 (12:06 -0800)
committerMarge Bot <emma+marge@anholt.net>
Thu, 2 Mar 2023 04:47:35 +0000 (04:47 +0000)
gallivm doesn't actuially jump across branches where no invocations are
active, so my previous assertion about the exec mask being nonzero was
incorrect.  This means that we'll always use a defined invocation for the
various LLVMBuildExtractElements using the result value, which is an
improvement over my even the code before my cttz change that would use
undefined values for the element to be extracted.

Fixes: 8c2493d041c4 ("gallivm: Use cttz instead of a loop for first_active_invocation().")
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21142>

src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c

index 7bbaccf..7daabd0 100644 (file)
@@ -94,7 +94,16 @@ invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
    return true;
 }
 
-/** Returns a scalar value of the first active invocation in the exec_mask. */
+/**
+ * Returns a scalar value of the first active invocation in the exec_mask.
+ *
+ * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
+ * branches thare are all false, or portions of a loop after a break/continue
+ * has ended the last invocation that had been active in the loop).  In that
+ * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
+ * the first_active_invocation (such as in memory loads, texture unit index
+ * lookups, etc) will use a valid index
+ */
 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -114,11 +123,12 @@ static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_bas
    bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
    bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
 
-   /* We know that exec mask always has a set bit (otherwise we would have
-    * jumped), so we can set is_zero_poison to true.
-    */
-   return lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
-                                    LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), true, false));
+   LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
+
+   LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
+                                                         LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
+
+   return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
 }
 
 static LLVMValueRef