From b284d222dbbe7a106dd1e52af7a826dc9855fc3c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 1 Nov 2018 16:04:01 -0500 Subject: [PATCH] intel/fs: Use SHADER_OPCODE_SEND for varying UBO pulls on gen7+ Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_eu_defines.h | 1 - src/intel/compiler/brw_fs.cpp | 31 ++++++++-- src/intel/compiler/brw_fs.h | 4 -- src/intel/compiler/brw_fs_cse.cpp | 1 - src/intel/compiler/brw_fs_generator.cpp | 73 ------------------------ src/intel/compiler/brw_schedule_instructions.cpp | 1 - src/intel/compiler/brw_shader.cpp | 2 - 7 files changed, 25 insertions(+), 88 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 4445f38..d3dfd6d 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -526,7 +526,6 @@ enum opcode { FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4, - FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_SAMPLE_ID, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index d9c339b..9ecabd5 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -217,7 +217,6 @@ fs_inst::is_send_from_grf() const { switch (opcode) { case SHADER_OPCODE_SEND: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case SHADER_OPCODE_SHADER_TIME_ADD: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: @@ -5196,16 +5195,37 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst) const gen_device_info *devinfo = bld.shader->devinfo; if (devinfo->gen >= 7) { + fs_reg index = inst->src[0]; /* We are switching the instruction from an ALU-like instruction to a * send-from-grf instruction. Since sends can't handle strides or * source modifiers, we have to make a copy of the offset source. */ - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MOV(tmp, inst->src[1]); - inst->src[1] = tmp; + fs_reg offset = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(offset, inst->src[1]); - inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; + const unsigned simd_mode = + inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + BRW_SAMPLER_SIMD_MODE_SIMD16; + + inst->opcode = SHADER_OPCODE_SEND; inst->mlen = inst->exec_size / 8; + inst->resize_sources(3); + + inst->sfid = BRW_SFID_SAMPLER; + inst->desc = brw_sampler_desc(devinfo, 0, 0, + GEN5_SAMPLER_MESSAGE_SAMPLE_LD, + simd_mode, 0); + if (index.file == IMM) { + inst->desc |= index.ud & 0xff; + inst->src[0] = brw_imm_ud(0); + } else { + const fs_builder ubld = bld.exec_all().group(1, 0); + fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.AND(tmp, index, brw_imm_ud(0xff)); + inst->src[0] = component(tmp, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + inst->src[2] = offset; /* payload */ } else { const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen), BRW_REGISTER_TYPE_UD); @@ -5727,7 +5747,6 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case FS_OPCODE_DDX_FINE: case FS_OPCODE_DDY_COARSE: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_PACK_HALF_2x16_SPLIT: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4e913eb..6467b4c 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -444,10 +444,6 @@ private: void generate_varying_pull_constant_load_gen4(fs_inst *inst, struct brw_reg dst, struct brw_reg index); - void generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); void generate_pixel_interpolator_query(fs_inst *inst, diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 1908924..9b89780 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -74,7 +74,6 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 35762b4..544d198 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1515,75 +1515,6 @@ fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst, } void -fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - assert(devinfo->gen >= 7); - /* Varying-offset pull constant loads are treated as a normal expression on - * gen7, so the fact that it's a send message is hidden at the IR level. - */ - assert(inst->header_size == 0); - assert(inst->mlen); - assert(index.type == BRW_REGISTER_TYPE_UD); - - uint32_t simd_mode, rlen; - if (inst->exec_size == 16) { - rlen = 8; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } else { - assert(inst->exec_size == 8); - rlen = 4; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - } - - if (index.file == BRW_IMMEDIATE_VALUE) { - - uint32_t surf_index = index.ud; - - brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_sfid(devinfo, send, BRW_SFID_SAMPLER); - brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW)); - brw_set_src0(p, send, offset); - brw_set_desc(p, send, - brw_message_desc(devinfo, inst->mlen, rlen, false) | - brw_sampler_desc(devinfo, surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - simd_mode, 0)); - - } else { - - struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - /* a0.0 = surf_index & 0xff */ - brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); - brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1); - brw_set_dest(p, insn_and, addr); - brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD))); - brw_set_src1(p, insn_and, brw_imm_ud(0x0ff)); - - brw_pop_insn_state(p); - - /* dst = send(offset, a0.0 | ) */ - brw_send_indirect_message( - p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW), - offset, addr, - brw_message_desc(devinfo, inst->mlen, rlen, false) | - brw_sampler_desc(devinfo, - 0 /* surface */, - 0 /* sampler */, - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - simd_mode, - 0)); - } -} - -void fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg dst, struct brw_reg src, @@ -2133,10 +2064,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_varying_pull_constant_load_gen4(inst, dst, src[0]); break; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]); - break; - case FS_OPCODE_REP_FB_WRITE: case FS_OPCODE_FB_WRITE: generate_fb_write(inst, src[0]); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 46d3111..be57802 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -323,7 +323,6 @@ schedule_node::set_latency_gen7(bool is_haswell) break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: case VS_OPCODE_PULL_CONSTANT_LOAD: diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 5b1d500..b4c7487 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -407,8 +407,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "uniform_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: return "varying_pull_const_gen4"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - return "varying_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; -- 2.7.4