From 35b741c8e74cf7c6a99d513c1fd01477545a172d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sat, 28 Jun 2014 13:53:55 -0700 Subject: [PATCH] i965/vec4: Pass const references to instruction functions. text data bss dec hex filename 4231165 123200 39648 4394013 430c1d i965_dri.so 4186277 123200 39648 4349125 425cc5 i965_dri.so Cuts 43k of .text and saves a bunch of useless struct copies. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_vec4.h | 92 ++++++++++++++++---------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 +++-- 2 files changed, 67 insertions(+), 41 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5702d06..6ac35d7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -448,45 +448,67 @@ public: vec4_instruction *emit_before(vec4_instruction *inst, vec4_instruction *new_inst); - vec4_instruction *MOV(dst_reg dst, src_reg src0); - vec4_instruction *NOT(dst_reg dst, src_reg src0); - vec4_instruction *RNDD(dst_reg dst, src_reg src0); - vec4_instruction *RNDE(dst_reg dst, src_reg src0); - vec4_instruction *RNDZ(dst_reg dst, src_reg src0); - vec4_instruction *FRC(dst_reg dst, src_reg src0); - vec4_instruction *F32TO16(dst_reg dst, src_reg src0); - vec4_instruction *F16TO32(dst_reg dst, src_reg src0); - vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *DPH(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *SHL(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *SHR(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *ASR(dst_reg dst, src_reg src0, src_reg src1); + vec4_instruction *MOV(const dst_reg &dst, const src_reg &src0); + vec4_instruction *NOT(const dst_reg &dst, const src_reg &src0); + vec4_instruction *RNDD(const dst_reg &dst, const src_reg &src0); + vec4_instruction *RNDE(const dst_reg &dst, const src_reg &src0); + vec4_instruction *RNDZ(const dst_reg &dst, const src_reg &src0); + vec4_instruction *FRC(const dst_reg &dst, const src_reg &src0); + vec4_instruction *F32TO16(const dst_reg &dst, const src_reg &src0); + vec4_instruction *F16TO32(const dst_reg &dst, const src_reg &src0); + vec4_instruction *ADD(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *MUL(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *MACH(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *MAC(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *AND(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *OR(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *XOR(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *DP3(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *DP4(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *DPH(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *SHL(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *SHR(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *ASR(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition); vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition); vec4_instruction *IF(uint32_t predicate); - vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index); - vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index); - vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index); - vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x); - vec4_instruction *BFREV(dst_reg dst, src_reg value); - vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value); - vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset); - vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base); - vec4_instruction *FBH(dst_reg dst, src_reg value); - vec4_instruction *FBL(dst_reg dst, src_reg value); - vec4_instruction *CBIT(dst_reg dst, src_reg value); - vec4_instruction *MAD(dst_reg dst, src_reg c, src_reg b, src_reg a); - vec4_instruction *ADDC(dst_reg dst, src_reg src0, src_reg src1); - vec4_instruction *SUBB(dst_reg dst, src_reg src0, src_reg src1); + vec4_instruction *PULL_CONSTANT_LOAD(const dst_reg &dst, + const src_reg &index); + vec4_instruction *SCRATCH_READ(const dst_reg &dst, const src_reg &index); + vec4_instruction *SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, + const src_reg &index); + vec4_instruction *LRP(const dst_reg &dst, const src_reg &a, + const src_reg &y, const src_reg &x); + vec4_instruction *BFREV(const dst_reg &dst, const src_reg &value); + vec4_instruction *BFE(const dst_reg &dst, const src_reg &bits, + const src_reg &offset, const src_reg &value); + vec4_instruction *BFI1(const dst_reg &dst, const src_reg &bits, + const src_reg &offset); + vec4_instruction *BFI2(const dst_reg &dst, const src_reg &bfi1_dst, + const src_reg &insert, const src_reg &base); + vec4_instruction *FBH(const dst_reg &dst, const src_reg &value); + vec4_instruction *FBL(const dst_reg &dst, const src_reg &value); + vec4_instruction *CBIT(const dst_reg &dst, const src_reg &value); + vec4_instruction *MAD(const dst_reg &dst, const src_reg &c, + const src_reg &b, const src_reg &a); + vec4_instruction *ADDC(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); + vec4_instruction *SUBB(const dst_reg &dst, const src_reg &src0, + const src_reg &src1); int implied_mrf_writes(vec4_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c732c90..219515a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -112,7 +112,7 @@ vec4_visitor::emit(enum opcode opcode) #define ALU1(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0) \ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \ { \ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ src0); \ @@ -120,7 +120,8 @@ vec4_visitor::emit(enum opcode opcode) #define ALU2(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ + const src_reg &src1) \ { \ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ src0, src1); \ @@ -128,7 +129,8 @@ vec4_visitor::emit(enum opcode opcode) #define ALU2_ACC(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ + const src_reg &src1) \ { \ vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \ BRW_OPCODE_##op, dst, src0, src1); \ @@ -138,7 +140,8 @@ vec4_visitor::emit(enum opcode opcode) #define ALU3(op) \ vec4_instruction * \ - vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\ + vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \ + const src_reg &src1, const src_reg &src2) \ { \ assert(brw->gen >= 6); \ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \ @@ -238,7 +241,7 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition) } vec4_instruction * -vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index) +vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index) { vec4_instruction *inst; @@ -251,7 +254,8 @@ vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index) } vec4_instruction * -vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index) +vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src, + const src_reg &index) { vec4_instruction *inst; -- 2.7.4