From ff8f272b0d02b41a0ce34ab6af7119b9e06f4961 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 28 Sep 2011 17:37:54 -0700 Subject: [PATCH] i965/fs: Implement integer quotient and remainder math operations. Signed-off-by: Kenneth Graunke Tested-by: Ian Romanick Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_defines.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs.cpp | 16 ++++++++++++++-- src/mesa/drivers/dri/i965/brw_fs.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 2 ++ .../drivers/dri/i965/brw_fs_schedule_instructions.cpp | 2 ++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 8 ++++++-- src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++++ 7 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 05a1337..a111630 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -622,6 +622,8 @@ enum opcode { SHADER_OPCODE_EXP2, SHADER_OPCODE_LOG2, SHADER_OPCODE_POW, + SHADER_OPCODE_INT_QUOTIENT, + SHADER_OPCODE_INT_REMAINDER, SHADER_OPCODE_SIN, SHADER_OPCODE_COS, FS_OPCODE_DDX, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9a89f88..1d93a51 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -152,6 +152,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: @@ -576,7 +578,15 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) int base_mrf = 2; fs_inst *inst; - assert(opcode == SHADER_OPCODE_POW); + switch (opcode) { + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + break; + default: + assert(!"not reached: unsupported binary math opcode."); + return NULL; + } if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -586,19 +596,21 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) */ if (src0.file == UNIFORM || src0.abs || src0.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); + expanded.type = src0.type; emit(BRW_OPCODE_MOV, expanded, src0); src0 = expanded; } if (src1.file == UNIFORM || src1.abs || src1.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); + expanded.type = src1.type; emit(BRW_OPCODE_MOV, expanded, src1); src1 = expanded; } inst = emit(opcode, dst, src0, src1); } else { - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, src1.type), src1); inst = emit(opcode, dst, src0, reg_null_f); inst->base_mrf = base_mrf; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f6a57ba..56181a3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -305,6 +305,8 @@ public: opcode == SHADER_OPCODE_LOG2 || opcode == SHADER_OPCODE_SIN || opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_INT_QUOTIENT || + opcode == SHADER_OPCODE_INT_REMAINDER || opcode == SHADER_OPCODE_POW); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 8176a76..4c158fe 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -794,6 +794,8 @@ fs_visitor::generate_code() generate_math_gen4(inst, dst, src[0]); } break; + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_POW: if (intel->gen >= 6) { generate_math2_gen6(inst, dst, src[0], src[1]); diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 1f83ee2..910f329 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -75,11 +75,13 @@ public: case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; + case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_SQRT: case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; + case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 3af5780..07ea84f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -294,10 +294,14 @@ fs_visitor::visit(ir_expression *ir) } break; case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ + assert(ir->type->is_integer()); + emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_mod: - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ + assert(ir->type->is_integer()); + emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]); break; case ir_binop_less: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index a6ed810..c938c75 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -227,6 +227,10 @@ brw_math_function(enum opcode op) return BRW_MATH_FUNCTION_SIN; case SHADER_OPCODE_COS: return BRW_MATH_FUNCTION_COS; + case SHADER_OPCODE_INT_QUOTIENT: + return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; + case SHADER_OPCODE_INT_REMAINDER: + return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; default: assert(!"not reached: unknown math function"); return 0; -- 2.7.4