From cbea91eb57a501bebb1ca256a4ba7d15cb91c511 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 7 Dec 2018 15:40:43 -0800 Subject: [PATCH] intel/fs: Remove nasty open-coded CHV/BXT 64-bit workarounds. Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_fs_builder.h | 68 ++-------------------------- src/intel/compiler/brw_fs_nir.cpp | 89 ++++--------------------------------- 2 files changed, 12 insertions(+), 145 deletions(-) diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 4846820..c50af4c 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -451,43 +451,13 @@ namespace brw { if (cluster_size > 1) { const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0); - dst_reg left = horiz_stride(tmp, 2); - dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2); - - /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, regioning in Align1 must follow - * these rules: - * - * [...] - * - * 3. Source and Destination offset must be the same, except - * the case of scalar source." - * - * In order to work around this, we create a temporary register - * and shift left over to match right. If we have a 64-bit type, - * we have to use two integer MOVs instead of a 64-bit MOV. - */ - if (need_matching_subreg_offset(opcode, tmp.type)) { - dst_reg tmp2 = vgrf(tmp.type); - dst_reg new_left = horiz_stride(horiz_offset(tmp2, 1), 2); - if (type_sz(tmp.type) > 4) { - ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 0), - subscript(left, BRW_REGISTER_TYPE_D, 0)); - ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 1), - subscript(left, BRW_REGISTER_TYPE_D, 1)); - } else { - ubld.MOV(new_left, left); - } - left = new_left; - } + const dst_reg left = horiz_stride(tmp, 2); + const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2); set_condmod(mod, ubld.emit(opcode, right, left, right)); } if (cluster_size > 2) { - if (type_sz(tmp.type) <= 4 && - !need_matching_subreg_offset(opcode, tmp.type)) { + if (type_sz(tmp.type) <= 4) { const fs_builder ubld = exec_all().group(dispatch_width() / 4, 0); src_reg left = horiz_stride(horiz_offset(tmp, 1), 4); @@ -787,38 +757,6 @@ namespace brw { } } - - /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, regioning in Align1 must follow - * these rules: - * - * [...] - * - * 3. Source and Destination offset must be the same, except - * the case of scalar source." - * - * This helper just detects when we're in this case. - */ - bool - need_matching_subreg_offset(enum opcode opcode, - enum brw_reg_type type) const - { - if (!shader->devinfo->is_cherryview && - !gen_device_info_is_9lp(shader->devinfo)) - return false; - - if (type_sz(type) > 4) - return true; - - if (opcode == BRW_OPCODE_MUL && - !brw_reg_type_is_floating_point(type)) - return true; - - return false; - } - bblock_t *block; exec_node *cursor; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 92ec85a..312cd22 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -805,30 +805,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_i2i64: case nir_op_u2f64: case nir_op_u2u64: - /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: - * - * "When source or destination is 64b (...), regioning in Align1 - * must follow these rules: - * - * 1. Source and destination horizontal stride must be aligned to - * the same qword. - * (...)" - * - * This means that conversions from bit-sizes smaller than 64-bit to - * 64-bit need to have the source data elements aligned to 64-bit. - * This restriction does not apply to BDW and later. - */ - if (nir_dest_bit_size(instr->dest.dest) == 64 && - nir_src_bit_size(instr->src[0].src) < 64 && - (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { - fs_reg tmp = bld.vgrf(result.type, 1); - tmp = subscript(tmp, op[0].type, 0); - inst = bld.MOV(tmp, op[0]); - inst = bld.MOV(result, tmp); - inst->saturate = instr->dest.saturate; - break; - } - /* fallthrough */ case nir_op_f2f32: case nir_op_f2i32: case nir_op_f2u32: @@ -1463,36 +1439,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) unreachable("not reached: should have been lowered"); case nir_op_ishl: + bld.SHL(result, op[0], op[1]); + break; case nir_op_ishr: - case nir_op_ushr: { - fs_reg shift_count = op[1]; - - if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { - if (op[1].file == VGRF && - (result.type == BRW_REGISTER_TYPE_Q || - result.type == BRW_REGISTER_TYPE_UQ)) { - shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4), - BRW_REGISTER_TYPE_UD); - shift_count.stride = 2; - bld.MOV(shift_count, op[1]); - } - } - - switch (instr->op) { - case nir_op_ishl: - bld.SHL(result, op[0], shift_count); - break; - case nir_op_ishr: - bld.ASR(result, op[0], shift_count); - break; - case nir_op_ushr: - bld.SHR(result, op[0], shift_count); - break; - default: - unreachable("not reached"); - } + bld.ASR(result, op[0], op[1]); + break; + case nir_op_ushr: + bld.SHR(result, op[0], op[1]); break; - } case nir_op_pack_half_2x16_split: bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); @@ -4414,34 +4368,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr const fs_reg tmp_left = horiz_stride(tmp, 2); const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); - /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, regioning in Align1 must follow - * these rules: - * - * [...] - * - * 3. Source and Destination offset must be the same, except - * the case of scalar source." - * - * In order to work around this, we have to emit two 32-bit MOVs instead - * of a single 64-bit MOV to do the shuffle. - */ - if (type_sz(value.type) > 4 && - (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { - ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 0), - subscript(src_right, BRW_REGISTER_TYPE_D, 0)); - ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 1), - subscript(src_right, BRW_REGISTER_TYPE_D, 1)); - ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 0), - subscript(src_left, BRW_REGISTER_TYPE_D, 0)); - ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 1), - subscript(src_left, BRW_REGISTER_TYPE_D, 1)); - } else { - ubld.MOV(tmp_left, src_right); - ubld.MOV(tmp_right, src_left); - } + ubld.MOV(tmp_left, src_right); + ubld.MOV(tmp_right, src_left); + bld.MOV(retype(dest, value.type), tmp); break; } -- 2.7.4