ac/llvm: add missing optimization barrier for 64-bit readlanes
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 27 Jan 2020 12:42:11 +0000 (13:42 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 12 Mar 2020 07:46:42 +0000 (08:46 +0100)
Otherwise, LLVM optimizes it but it's actually incorrect.

Fixes: 0f45d4dc2b1 ("ac: add ac_build_readlane without optimization barrier")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3585>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3585>

src/amd/llvm/ac_llvm_build.c

index 760d912..93e2e28 100644 (file)
@@ -3611,11 +3611,15 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
 }
 
 static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
+                 LLVMValueRef lane, bool with_opt_barrier)
 {
        LLVMTypeRef type = LLVMTypeOf(src);
        LLVMValueRef result;
 
+       if (with_opt_barrier)
+               ac_build_optimization_barrier(ctx, &src);
+
        src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
        if (lane)
                lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
@@ -3630,20 +3634,13 @@ _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef l
        return LLVMBuildTrunc(ctx->builder, result, type, "");
 }
 
-/**
- * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
- *
- * The optimization barrier is not needed if the value is the same in all lanes
- * or if this is called in the outermost block.
- *
- * @param ctx
- * @param src
- * @param lane - id of the lane or NULL for the first active lane
- * @return value of the lane
- */
-LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
-                                             LLVMValueRef src, LLVMValueRef lane)
+static LLVMValueRef
+ac_build_readlane_common(struct ac_llvm_context *ctx,
+                        LLVMValueRef src, LLVMValueRef lane,
+                        bool with_opt_barrier)
 {
+       LLVMTypeRef src_type = LLVMTypeOf(src);
+       src = ac_to_integer(ctx, src);
        unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
        LLVMValueRef ret;
 
@@ -3654,32 +3651,48 @@ LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
                        LLVMBuildBitCast(ctx->builder, src, vec_type, "");
                ret = LLVMGetUndef(vec_type);
                for (unsigned i = 0; i < bits / 32; i++) {
+                       LLVMValueRef ret_comp;
+
                        src = LLVMBuildExtractElement(ctx->builder, src_vector,
                                                LLVMConstInt(ctx->i32, i, 0), "");
-                       LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
+
+                       ret_comp = _ac_build_readlane(ctx, src, lane,
+                                                     with_opt_barrier);
+
                        ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
                                                LLVMConstInt(ctx->i32, i, 0), "");
                }
        } else {
-               ret = _ac_build_readlane(ctx, src, lane);
+               ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
        }
 
-       return ret;
+       if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+               return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
+       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
 }
 
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ *
+ * The optimization barrier is not needed if the value is the same in all lanes
+ * or if this is called in the outermost block.
+ *
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx,
+                                             LLVMValueRef src, LLVMValueRef lane)
 {
-       LLVMTypeRef src_type = LLVMTypeOf(src);
-       src = ac_to_integer(ctx, src);
-       LLVMValueRef ret;
+       return ac_build_readlane_common(ctx, src, lane, false);
+}
 
-       ac_build_optimization_barrier(ctx, &src);
 
-       ret = ac_build_readlane_no_opt_barrier(ctx, src, lane);
-       if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
-               return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
-       return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+{
+       return ac_build_readlane_common(ctx, src, lane, true);
 }
 
 LLVMValueRef