From: Eric Anholt Date: Tue, 4 May 2010 18:58:03 +0000 (-0700) Subject: ir_to_mesa: Get temps allocated at the right times. X-Git-Tag: 062012170305~10660^2~625^2~2^2~43 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b2ed4dd7b0270e469302965269007292117d02e2;p=profile%2Fivi%2Fmesa.git ir_to_mesa: Get temps allocated at the right times. The alloced_vec4/vec4 distinction was an experiment to expose the cost of temps to the codegen. But the problem is that the temporary production rule gets called after the emit rule that was using the temp. We could have the args to emit_op be pointers to where the temp would get allocated later, but that seems overly hard while just trying to bring this thing up. Besides, the temps used in expressions bear only the vaguest relation to how many temps will be used after register allocation. --- diff --git a/ir_to_mesa.cpp b/ir_to_mesa.cpp index f36dea5..eb55f82 100644 --- a/ir_to_mesa.cpp +++ b/ir_to_mesa.cpp @@ -293,6 +293,9 @@ ir_to_mesa_visitor::visit(ir_expression *ir) ir->accept(&v); exit(1); } + + /* Allocate a temporary for the result. */ + this->get_temp(this->result); } @@ -309,6 +312,7 @@ ir_to_mesa_visitor::visit(ir_swizzle *ir) assert(this->result); tree = this->create_tree(MB_TERM_swizzle_vec4, ir, this->result, NULL); + this->get_temp(tree); for (i = 0; i < 4; i++) { if (i < ir->type->vector_elements) { diff --git a/mesa_codegen.brg b/mesa_codegen.brg index 9f2761b..f1f24da 100644 --- a/mesa_codegen.brg +++ b/mesa_codegen.brg @@ -76,25 +76,10 @@ # produced at the cost of "cost". We measure "cost" in approximate # instruction count. The BURG should then more or less minimize the # number of instructions. -# -# A reference of a variable has an allocated register already, so it -# can be used as an argument for pretty much anything. -alloced_vec4: reference_vec4 0 - -# If something produces a vec4 with a location already, then we don't need -# to allocate a temp reg for it. -vec4: alloced_vec4 0 - -# If something produces a vec4 result that needs a place to live, -# then there's a cost with allocating a temporary for it. We -# approximate that as one instruction's cost, even though sometimes -# that temp might not be a newly-allocated temp due to later -# live-dead analysis. -alloced_vec4: vec4 1 -{ - /* FINISHME */ - tree->v->get_temp(tree); -} + +# A reference of a variable is just a vec4 register location, +# so it can be used as an argument for pretty much anything. +vec4: reference_vec4 0 # Here's the rule everyone will hit: Moving the result of an # expression into a variable-dereference register location. @@ -102,14 +87,14 @@ alloced_vec4: vec4 1 # Note that this is likely a gratuitous move. We could make variants # of each of the following rules, e.g: # -# vec4: add_vec4_vec4(alloced_vec4, alloced_vec4) 1 +# vec4: add_vec4_vec4(vec4, vec4) 1 # { # emit(ADD, tree, tree->left, tree->right); # } # # becoming # -# vec4: assign(alloced_vec4_vec4, add_vec4_vec4(alloced_vec4, alloced_vec4) 1 +# vec4: assign(vec4_vec4, add_vec4_vec4(vec4, vec4) 1 # { # emit(ADD, tree->left, tree->right->left, tree->right->right); # } @@ -117,7 +102,7 @@ alloced_vec4: vec4 1 # But it seems like a lot of extra typing and duped code, when we # probably want copy propagation and dead code after codegen anyway, # which would clean these up. -stmt: assign(alloced_vec4, alloced_vec4) 1 +stmt: assign(vec4, vec4) 1 { ir_to_mesa_emit_op1(tree, OPCODE_MOV, ir_to_mesa_dst_reg_from_src(tree->left->src_reg), @@ -126,7 +111,7 @@ stmt: assign(alloced_vec4, alloced_vec4) 1 # Perform a swizzle by composing our swizzle with the swizzle # required to get at the src reg. -vec4: swizzle_vec4(alloced_vec4) 1 +vec4: swizzle_vec4(vec4) 1 { ir_to_mesa_src_reg reg = tree->left->src_reg; int swiz[4]; @@ -145,7 +130,7 @@ vec4: swizzle_vec4(alloced_vec4) 1 reg); } -vec4: add_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: add_vec4_vec4(vec4, vec4) 1 { ir_to_mesa_emit_op2(tree, OPCODE_ADD, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -153,7 +138,7 @@ vec4: add_vec4_vec4(alloced_vec4, alloced_vec4) 1 tree->right->src_reg); } -vec4: sub_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: sub_vec4_vec4(vec4, vec4) 1 { ir_to_mesa_emit_op2(tree, OPCODE_SUB, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -161,7 +146,7 @@ vec4: sub_vec4_vec4(alloced_vec4, alloced_vec4) 1 tree->right->src_reg); } -vec4: mul_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: mul_vec4_vec4(vec4, vec4) 1 { ir_to_mesa_emit_op2(tree, OPCODE_MUL, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -169,7 +154,7 @@ vec4: mul_vec4_vec4(alloced_vec4, alloced_vec4) 1 tree->right->src_reg); } -vec4: dp4_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: dp4_vec4_vec4(vec4, vec4) 1 { ir_to_mesa_emit_op2(tree, OPCODE_DP4, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -178,7 +163,7 @@ vec4: dp4_vec4_vec4(alloced_vec4, alloced_vec4) 1 tree->src_reg.swizzle = SWIZZLE_XXXX; } -vec4: dp3_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: dp3_vec4_vec4(vec4, vec4) 1 { ir_to_mesa_emit_op2(tree, OPCODE_DP3, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -188,7 +173,7 @@ vec4: dp3_vec4_vec4(alloced_vec4, alloced_vec4) 1 } -vec4: dp2_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: dp2_vec4_vec4(vec4, vec4) 1 { ir_to_mesa_emit_op2(tree, OPCODE_DP2, ir_to_mesa_dst_reg_from_src(tree->src_reg), @@ -197,7 +182,7 @@ vec4: dp2_vec4_vec4(alloced_vec4, alloced_vec4) 1 tree->src_reg.swizzle = SWIZZLE_XXXX; } -vec4: div_vec4_vec4(alloced_vec4, alloced_vec4) 1 +vec4: div_vec4_vec4(vec4, vec4) 1 { /* FINISHME: Mesa RCP only uses the X channel, this node is for vec4. */ ir_to_mesa_emit_op1(tree, OPCODE_RCP, @@ -210,7 +195,7 @@ vec4: div_vec4_vec4(alloced_vec4, alloced_vec4) 1 tree->left->src_reg); } -vec4: sqrt_vec4(alloced_vec4) 1 +vec4: sqrt_vec4(vec4) 1 { /* FINISHME: Mesa RSQ only uses the X channel, this node is for vec4. */ ir_to_mesa_emit_op1(tree, OPCODE_RSQ,