From bdab572a86f27b92ba10124f85d278e9c8861fff Mon Sep 17 00:00:00 2001 From: =?utf8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= Date: Mon, 13 Jun 2016 08:29:53 +0200 Subject: [PATCH] i965/fs: indirect addressing with doubles is not supported in CHV/BSW/BXT MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit From the Cherryview's PRM, Volume 7, 3D Media GPGPU Engine, Register Region Restrictions, page 844: "When source or destination datatype is 64b or operation is integer DWord multiply, indirect addressing must not be used." v2: - Fix it for Broxton too. v3: - Simplify code by using subscript() and not creating a new num_components variable (Kenneth). Signed-off-by: Samuel Iglesias Gonsálvez Cc: "12.0" Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index d72b37b..ad9b421 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3611,10 +3611,32 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned read_size = instr->const_index[1] - (instr->num_components - 1) * type_sz(dest.type); + fs_reg indirect_chv_high_32bit; + bool is_chv_bxt_64bit = + (devinfo->is_cherryview || devinfo->is_broxton) && + type_sz(dest.type) == 8; + if (is_chv_bxt_64bit) { + indirect_chv_high_32bit = vgrf(glsl_type::uint_type); + /* Calculate indirect address to read high 32 bits */ + bld.ADD(indirect_chv_high_32bit, indirect, brw_imm_ud(4)); + } + for (unsigned j = 0; j < instr->num_components; j++) { - bld.emit(SHADER_OPCODE_MOV_INDIRECT, - offset(dest, bld, j), offset(src, bld, j), - indirect, brw_imm_ud(read_size)); + if (!is_chv_bxt_64bit) { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + offset(dest, bld, j), offset(src, bld, j), + indirect, brw_imm_ud(read_size)); + } else { + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + subscript(offset(dest, bld, j), BRW_REGISTER_TYPE_UD, 0), + offset(src, bld, j), + indirect, brw_imm_ud(read_size)); + + bld.emit(SHADER_OPCODE_MOV_INDIRECT, + subscript(offset(dest, bld, j), BRW_REGISTER_TYPE_UD, 1), + offset(src, bld, j), + indirect_chv_high_32bit, brw_imm_ud(read_size)); + } } } break; -- 2.7.4