From acd2bccd852f1e4edbac2e57dd47139908e79b5d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Wed, 17 Apr 2013 18:57:58 -0700 Subject: [PATCH] i965/vs: Add support for bit instructions. v2: Rebase on LRP addition. Use fix_3src_operand() when emitting BFE and BFI2. Add BFE and BFI2 to is_3src_inst check in brw_vec4_copy_propagation.cpp. Subtract result of FBH from 31 (unless an error) to convert MSB counts to LSB counts Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_vec4.h | 7 +++ .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 5 +- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 29 ++++++++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 66 ++++++++++++++++++++++ 4 files changed, 106 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 06b0f6a..7614ac5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -375,6 +375,13 @@ public: vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x); + vec4_instruction *BFREV(dst_reg dst, src_reg value); + vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value); + vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset); + vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base); + vec4_instruction *FBH(dst_reg dst, src_reg value); + vec4_instruction *FBL(dst_reg dst, src_reg value); + vec4_instruction *CBIT(dst_reg dst, src_reg value); int implied_mrf_writes(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index f2c6cd6..39eef4b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -215,7 +215,10 @@ vec4_visitor::try_copy_propagation(struct intel_context *intel, if (has_source_modifiers && !can_do_source_mods(inst)) return false; - if (inst->opcode == BRW_OPCODE_LRP && value.file == UNIFORM) + bool is_3src_inst = (inst->opcode == BRW_OPCODE_LRP || + inst->opcode == BRW_OPCODE_BFE || + inst->opcode == BRW_OPCODE_BFI2); + if (is_3src_inst && value.file == UNIFORM) return false; /* We can't copy-propagate a UD negation into a condmod diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 96b4965..91101f2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -842,6 +842,35 @@ vec4_generator::generate_code(exec_list *instructions) brw_LRP(p, dst, src[0], src[1], src[2]); break; + case BRW_OPCODE_BFREV: + /* BFREV only supports UD type for src and dst. */ + brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), + retype(src[0], BRW_REGISTER_TYPE_UD)); + break; + case BRW_OPCODE_FBH: + /* FBH only supports UD type for dst. */ + brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_FBL: + /* FBL only supports UD type for dst. */ + brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_CBIT: + /* CBIT only supports UD type for dst. */ + brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + + case BRW_OPCODE_BFE: + brw_BFE(p, dst, src[0], src[1], src[2]); + break; + + case BRW_OPCODE_BFI1: + brw_BFI1(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_BFI2: + brw_BFI2(p, dst, src[0], src[1], src[2]); + break; + case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3b0687f..cda425e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -136,6 +136,13 @@ ALU2(SHL) ALU2(SHR) ALU2(ASR) ALU3(LRP) +ALU1(BFREV) +ALU3(BFE) +ALU2(BFI1) +ALU3(BFI2) +ALU1(FBH) +ALU1(FBL) +ALU1(CBIT) /** Gen4 predicated IF. */ vec4_instruction * @@ -1382,6 +1389,39 @@ vec4_visitor::visit(ir_expression *ir) assert(!"derivatives not valid in vertex shader"); break; + case ir_unop_bitfield_reverse: + emit(BFREV(result_dst, op[0])); + break; + case ir_unop_bit_count: + emit(CBIT(result_dst, op[0])); + break; + case ir_unop_find_msb: { + src_reg temp = src_reg(this, glsl_type::uint_type); + + inst = emit(FBH(dst_reg(temp), op[0])); + inst->dst.writemask = WRITEMASK_XYZW; + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then + * subtract the result from 31 to convert the MSB count into an LSB count. + */ + + /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ + temp.swizzle = BRW_SWIZZLE_NOOP; + emit(MOV(result_dst, temp)); + + src_reg src_tmp = src_reg(result_dst); + emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ)); + + src_tmp.negate = true; + inst = emit(ADD(result_dst, src_tmp, src_reg(31))); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + } + case ir_unop_find_lsb: + emit(FBL(result_dst, op[0])); + break; + case ir_unop_noise: assert(!"not reached: should be handled by lower_noise"); break; @@ -1582,6 +1622,10 @@ vec4_visitor::visit(ir_expression *ir) inst = emit(SHR(result_dst, op[0], op[1])); break; + case ir_binop_bfm: + emit(BFI1(result_dst, op[0], op[1])); + break; + case ir_binop_ubo_load: { ir_constant *uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset_ir = ir->operands[1]->as_constant(); @@ -1637,6 +1681,28 @@ vec4_visitor::visit(ir_expression *ir) emit(LRP(result_dst, op[2], op[1], op[0])); break; + case ir_triop_bfi: + op[0] = fix_3src_operand(op[0]); + op[1] = fix_3src_operand(op[1]); + op[2] = fix_3src_operand(op[2]); + emit(BFI2(result_dst, op[0], op[1], op[2])); + break; + + case ir_triop_bitfield_extract: + op[0] = fix_3src_operand(op[0]); + op[1] = fix_3src_operand(op[1]); + op[2] = fix_3src_operand(op[2]); + /* Note that the instruction's argument order is reversed from GLSL + * and the IR. + */ + emit(BFE(result_dst, op[2], op[1], op[0])); + break; + + case ir_quadop_bitfield_insert: + assert(!"not reached: should be handled by " + "bitfield_insert_to_bfm_bfi\n"); + break; + case ir_quadop_vector: assert(!"not reached: should be handled by lower_quadop_vector"); break; -- 2.7.4