From: Lionel Landwerlin Date: Tue, 21 Nov 2023 08:38:19 +0000 (+0200) Subject: intel/fs: fix incorrect register flag interaction with dynamic interpolator mode X-Git-Tag: upstream/23.3.3~160 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0b7f91be9d1a16433d4a598490542f459bbfa761;p=platform%2Fupstream%2Fmesa.git intel/fs: fix incorrect register flag interaction with dynamic interpolator mode Once NIR code is lowered and a few optimization passes have run, there might be flag register interactions between instructions quite far away from one another. In the following case : f0 = and r0, r1 ... fs_interpolate r2, r3 ... if f0 ... endif If we lower fs_inteporlate while using the f0 register, we completely garble the value meant for the if block. To fix this, emit the predication for fs_interpolate in brw_fs_nir.cpp when doing the NIR translation to the backend IR. This will guarantee that the flag register interactions are visible to the optimization passes, avoiding the problem above. Signed-off-by: Lionel Landwerlin Fixes: 68027bd38e ("intel/fs: implement dynamic interpolation mode for dynamic persample shaders") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9757 Reviewed-by: Ian Romanick Part-of: (cherry picked from commit 83a1657b6c7f117f1226a955b8d2f1e01b22d322) --- diff --git a/.pick_status.json b/.pick_status.json index 375420b..c3a3a68 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1824,7 +1824,7 @@ "description": "intel/fs: fix incorrect register flag interaction with dynamic interpolator mode", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "68027bd38e134f45d1fe8612c0c31e5379ed7435", "notes": null diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index b01c221..0d4b93e 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -986,6 +986,17 @@ enum urb_logical_srcs { URB_LOGICAL_NUM_SRCS }; +enum interpolator_logical_srcs { + /** Interpolation offset */ + INTERP_SRC_OFFSET, + /** Message data */ + INTERP_SRC_MSG_DESC, + /** Flag register for dynamic mode */ + INTERP_SRC_DYNAMIC_MODE, + + INTERP_NUM_SRCS +}; + #ifdef __cplusplus /** diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 34df9a8..1ba548b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2092,12 +2092,18 @@ emit_pixel_interpolater_send(const fs_builder &bld, const fs_reg &dst, const fs_reg &src, const fs_reg &desc, + const fs_reg &flag_reg, glsl_interp_mode interpolation) { struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(bld.shader->stage_prog_data); - fs_inst *inst = bld.emit(opcode, dst, src, desc); + fs_reg srcs[INTERP_NUM_SRCS]; + srcs[INTERP_SRC_OFFSET] = src; + srcs[INTERP_SRC_MSG_DESC] = desc; + srcs[INTERP_SRC_DYNAMIC_MODE] = flag_reg; + + fs_inst *inst = bld.emit(opcode, dst, srcs, INTERP_NUM_SRCS); /* 2 floats per slot returned */ inst->size_written = 2 * dst.component_size(inst->exec_size); if (interpolation == INTERP_MODE_NOPERSPECTIVE) { @@ -3578,11 +3584,23 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u)); } + fs_reg flag_reg; + struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) key; + if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) { + struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data); + + check_dynamic_msaa_flag(bld.exec_all().group(8, 0), + wm_prog_data, + BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO); + flag_reg = brw_flag_reg(0, 0); + } + emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, dest, fs_reg(), /* src */ msg_data, + flag_reg, interpolation); break; } @@ -3603,6 +3621,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, dest, fs_reg(), /* src */ brw_imm_ud(off_x | (off_y << 4)), + fs_reg(), /* flag_reg */ interpolation); } else { fs_reg src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D); @@ -3612,6 +3631,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, dest, src, brw_imm_ud(0u), + fs_reg(), /* flag_reg */ interpolation); } break; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 1e7d766..40daed3 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -2727,17 +2727,17 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, unsigned mode; switch (inst->opcode) { case FS_OPCODE_INTERPOLATE_AT_SAMPLE: - assert(inst->src[0].file == BAD_FILE); + assert(inst->src[INTERP_SRC_OFFSET].file == BAD_FILE); mode = GFX7_PIXEL_INTERPOLATOR_LOC_SAMPLE; break; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: - assert(inst->src[0].file == BAD_FILE); + assert(inst->src[INTERP_SRC_OFFSET].file == BAD_FILE); mode = GFX7_PIXEL_INTERPOLATOR_LOC_SHARED_OFFSET; break; case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: - payload = inst->src[0]; + payload = inst->src[INTERP_SRC_OFFSET]; mlen = 2 * inst->exec_size / 8; mode = GFX7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET; break; @@ -2747,10 +2747,9 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, } const bool dynamic_mode = - inst->opcode == FS_OPCODE_INTERPOLATE_AT_SAMPLE && - wm_prog_key->multisample_fbo == BRW_SOMETIMES; + inst->src[INTERP_SRC_DYNAMIC_MODE].file != BAD_FILE; - fs_reg desc = inst->src[1]; + fs_reg desc = inst->src[INTERP_SRC_MSG_DESC]; uint32_t desc_imm = brw_pixel_interp_desc(devinfo, /* Leave the mode at 0 if persample_dispatch is @@ -2799,8 +2798,10 @@ lower_interpolator_logical_send(const fs_builder &bld, fs_inst *inst, const fs_builder &ubld = bld.exec_all().group(8, 0); desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); - check_dynamic_msaa_flag(ubld, wm_prog_data, - BRW_WM_MSAA_FLAG_MULTISAMPLE_FBO); + /* The predicate should have been built in brw_fs_nir.cpp when emitting + * NIR code. This guarantees that we do not have incorrect interactions + * with the flag register holding the predication result. + */ if (orig_desc.file == IMM) { /* Not using SEL here because we would generate an instruction with 2 * immediate sources which is not supported by HW.