From a88ce0c1c4c1f77209b71d5a6858f952642f385a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 10 Sep 2015 11:08:15 -0700 Subject: [PATCH] i965/vec4: Use the replicated fdot instruction in NIR Reviewed-by: Connor Abbott Reviewed-by: Eduardo Lima Mitev --- src/mesa/drivers/dri/i965/brw_shader.cpp | 8 ++++++++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index cf9aa23..eed73fb 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -96,6 +96,14 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) */ nir_options->lower_ffma = true; nir_options->lower_sub = true; + /* In the vec4 backend, our dpN instruction replicates its result to all + * the components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + * + * For the FS backend, it should be lowered away by the scalarizing pass so + * we should never see fdot anyway. + */ + nir_options->fdot_replicates = true; /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 144f9e5..482fce2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1254,17 +1254,17 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->predicate = BRW_PREDICATE_NORMAL; break; - case nir_op_fdot2: + case nir_op_fdot_replicated2: inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; - case nir_op_fdot3: + case nir_op_fdot_replicated3: inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; - case nir_op_fdot4: + case nir_op_fdot_replicated4: inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]); inst->saturate = instr->dest.saturate; break; -- 2.7.4